#!/usr/bin/env python ################################################################################ # plotrate.py: script for plotting average rate of tmix trace files # 2008-04-16 # # Copyright (c) 2008, Tom Quetchenbach # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, # this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # * Neither the name of the California Institute of Technology nor the names # of its contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ################################################################################ # this script analyzes the data rate of a tmix trace. See the file tmix_utils.py # for full documentation of the input format. This script is a bit more detailed # than the other one because it takes into account the pause times in the # connections, whereas the other does not. # # this script does not actually produce a plot, but the output is suitable for # use with gnuplot (http://www.gnuplot.info/). # # assumes input is sorted tmix connection vectors. They must be sorted by start # time, or this script will not work. # # No real attempt at memory or speed optimization is made here. from __future__ import division import sys import signal import math import optparse def insert(bytes, t, tx, rx): if t in bytes: otx, orx = bytes[t] bytes[t] = (tx+otx, rx+orx) return True else: bytes[t] = (tx, rx) return False end_now = False def interrupt(sig, frame): global end_now end_now = True if sys.stdout.isatty(): signal.signal(signal.SIGINT, interrupt) parser = optparse.OptionParser() parser.set_usage('%prog [options] trace_file > plot_file') parser.add_option('-i', '--interval', dest='interval', action='store', help='Set the averaging interval to SECS (default 100)', metavar='SECS', type='float', default=100.0) parser.add_option('-x', '--ignore-time', dest='ignore_time', action='store_true', help='Ignore connection pause times') options, args = parser.parse_args(sys.argv) try: f = open(args[1], 'r') except IndexError: parser.print_help() sys.exit(1) except IOError, e: print >> sys.stderr, 'Error opening file %s' % args[1], e sys.exit(1) SEC = 1000000 bytes = {} last_report = 0 xmit_all = 0 recv_all = 0 cvecs = 0 verbose = 0 cv_stats = [] first = True print '# time tx_rate rx_rate' while True: line = f.readline() if end_now: # keyboard interrupt received print >> sys.stderr, 'Received interrupt' cmd = 'done' elif line: parts = line.split() cmd = parts[0] if cmd[0] == '#': # comment continue try: value = int(parts[1]) except ValueError: value = float(parts[1]) else: cmd = 'done' if cmd == 'SEQ' or cmd == 'CONC': # new connection if not first: cv_stats.append((max_offset/SEC, total_size)) first = False base_time = value offset_time = 0 rx_offset_time = 0 tx_offset_time = 0 max_offset = 0 total_size = 0 xmit_sum = 0 recv_sum = 0 cvecs += 1 if verbose: print >> sys.stderr, 'new cvec at %d' % base_time elif cmd == 't': # sequential connection sleep time if not options.ignore_time: offset_time += value if verbose: print >> sys.stderr, 'sleep until %d' % (base_time + offset_time) elif cmd == 't>': # conc. connection sleep time (tx) if not options.ignore_time: tx_offset_time += value if verbose: print >> sys.stderr, 'sleep until %d' % (base_time + tx_offset_time) elif cmd == 't<': if not options.ignore_time: rx_offset_time += value if verbose: print >> sys.stderr, 'sleep until %d' % (base_time + rx_offset_time) elif cmd == '>': # sequential connection transmit insert(bytes, base_time + offset_time, value, 0) max_offset = offset_time total_size += value if verbose: print >> sys.stderr, 'new sequential tx at %d' % (base_time + offset_time) elif cmd == '<': # sequential connection recv insert(bytes, base_time + offset_time, 0, value) max_offset = offset_time total_size += value if verbose: print >> sys.stderr, 'new sequential rx at %d' % (base_time + offset_time) elif cmd == 'c>': # concurrent connection transmit insert(bytes, base_time + tx_offset_time, value, 0) max_offset = max(tx_offset_time, max_offset) total_size += value if verbose: print >> sys.stderr, 'new concurrent tx at %d' % (base_time + tx_offset_time) elif cmd == 'c<': # concurrent connection receive insert(bytes, base_time + rx_offset_time, 0, value) max_offset = max(rx_offset_time, max_offset) total_size += value if verbose: print >> sys.stderr, 'new concurrent rx at %d' % (base_time + rx_offset_time) if cmd == 'done' or base_time - last_report > options.interval * SEC: # time to print a line # sum up all transfers in the report period for t in sorted(bytes.keys()): if t >= base_time: break xmit, recv = bytes[t] xmit_sum += xmit recv_sum += recv del bytes[t] newest = t # calculate and print statistics delta = newest - last_report xmit_all += xmit_sum recv_all += recv_sum xmit_rate = xmit_sum * 8 / delta recv_rate = recv_sum * 8 / delta print float(newest) / SEC, xmit_rate, recv_rate sys.stdout.flush() last_report = newest if cmd == 'done': break # print a histogram of connection times all_dur = [x for x, s in cv_stats if x > 0] all_size = [s for x, s in cv_stats] total_dur = sum(all_dur) total_size = sum(all_size) print >> sys.stderr, '%d cvecs; Average rate %g %g' % (cvecs, xmit_all * 8 / last_report, recv_all * 8 / last_report) print >> sys.stderr, 'Average connection duration %g' % ( total_dur/len(cv_stats)) all_cv = len(cv_stats) zero_size = sum([s for x, s in cv_stats if x == 0]) # filter out zero-time connections cv_stats = [(x, s) for x, s in cv_stats if x > 0] print >> sys.stderr, 'Connection duration histogram:' print >> sys.stderr, 'secs flows percent bytes percent' zero_count = all_cv - len(cv_stats) print >> sys.stderr, 'zero: %- 10d (%8.5f%%) %- 15d (%.5f%%)' % ( zero_count, zero_count/all_cv*100, zero_size, zero_size/total_size * 100) for i in range(int(math.log10(min(all_dur))), int(math.log10(max(all_dur)))): match = [(x, s) for x, s in cv_stats if int(math.log10(x)) == i] count = len(match) match_size = sum([s for x, s in match]) print >> sys.stderr, '10^% 2d: %- 10d (%8.5f%%) %- 15d (%.5f%%)' % ( i, count, count/all_cv*100, match_size, match_size/total_size * 100)