#!/usr/bin/env python # # File: nadq_plot.py # Version: 5 # Date: 14 Jul 2011 # Description: Plots disk qual charts from NetApp disk_qual console output # Usage: nadq_plot.py [-h] [-i] [-d ] [-n ] [-o ] # Example: nadq_plot.py 39_NADQ02_SAS_AZCS_DskQul_Perf_hdd-177.console # ## # EXTERNAL DEPENDENCIES # # See http://matplotlib.sourceforge.net/ for details on library used for # plotting results import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import sys, string, re, os, getopt from collections import deque ## # GLOBALS # # Set to true to additional debug statements glob_debug = False # Name of this application from basename(sys.argv[0]) glob_arg_app_name = None # A friendly disk name to be used in the charts glob_arg_disk_friendly_name = None # A note to be added to the charts glob_arg_run_note = None # Output files format glob_arg_output_fmt = "png" # By default later versions of disk_qual reports iops for AZCS proximal # test cases in client iops, but older versions report in disk iops glob_arg_azcs_in_disk_iops = False # Directory where output files are created glob_arg_output_dir = "." # Set of disks parsed from the input file hashed on disk_id (e.g., 3d.00.7) glob_disks_tbl = dict() # Set to the kernel version string (e.g., NetApp Release RboilermakerN_110622_0300 7-Mode: Wed Jun 22 03:24:07 PDT 2011 glob_kernel_version = None ## # FUNCTIONS # def usage_exit(exit_code=0): print "Usage: %s [-h] [-d ] [-n ] [-f ] \\" % \ glob_arg_app_name print " " print "Plots disk qual charts from NetApp disk_qual console output" print "-h displays program usage and exits" print "-i convert iops reported for AZCS proximal" print " test cases from disk iops to client iops" print "-d specifies a friendly name to be used for the" print " disk in the resulting plots" print "-n specifies a note in the run charts" print "-f specifies the output format (default is png)," print " supports emf, eps, pdf, png, ps, raw, rgba," print " svg, svgz" print "-o directory where output files are created" print " defaults to '.'" print "E.g. usage:" print "nadq_plot.py -d 'My 1PB disk' 39_NADQ02_SAS_AZCS_DskQul_Perf_hdd-177.console" print "Creates a set of charts, one for each stroking depth in test case" sys.exit(exit_code) def parse_cmdln_args(): global glob_arg_app_name, glob_arg_disk_friendly_name, \ glob_arg_run_note, glob_arg_output_fmt, \ glob_arg_azcs_in_disk_iops, \ glob_arg_output_dir glob_arg_app_name = os.path.basename(sys.argv[0]) try: opts, args = getopt.getopt(sys.argv[1:], \ "hid:n:f:o:", ["help", "iops", "disk", "note", "fmt", "outdir"]) except getopt.GetoptError, err: print str(err) usage_exit(2) for opt, optval in opts: if opt in ("-h", "--help"): # help with usage usage_exit() elif opt in ("-i", "--iops"): # convert disk to client iops glob_arg_azcs_in_disk_iops = True elif opt in ("-d", "--disk"): # friendly name of disk glob_arg_disk_friendly_name = optval elif opt in ("-n", "--note"): # a note for the run glob_arg_run_note = optval elif opt in ("-f", "--fmt"): # output files format glob_arg_output_fmt = optval elif opt in ("-o", "--outdir"): # output directory glob_arg_output_dir = optval else: assert False, "unhandled command line option" if len(args) > 1: print "%s: too many input files specified" % glob_arg_app_name usage_exit(3) if len(args) == 0: print "%s: missing " % glob_arg_app_name usage_exit(0) infh = None try: infh = open(args[0], "r") except IOError: print "%s: unable to open '%s'" % (glob_arg_app_name, args[0]) usage_exit(4) return infh ## # CLASSES # class disk_qual_reader: """Parses a disk_qual console output file""" # Regular expressions for parsing console output file sysconfig_line = re.compile(r".*sysconfig") disk_qual_cmd_line = re.compile(r"^.*disk_qual (\w+)") CCT_dist_line = re.compile(r"^.*CCT distribution") prompt_line = re.compile(r"^.*\*>\s*$") result_hdr_line = re.compile(r"^.*disk,result") ending_script_exec_line = re.compile(r"^.*\[\w\.]+\]: Ending script execution") def __init__(self, infh): self.infh = infh self.test_case_queue = deque() def parse_file(self): global glob_kernel_version for line in self.infh: # check for kernel version sr = disk_qual_reader.sysconfig_line.match(line) if sr is not None: glob_kernel_version = self.infh.next().strip() continue sr = disk_qual_reader.disk_qual_cmd_line.match(line) if sr is None: continue dq_subcmd = sr.group(1) if dq_subcmd.startswith("script"): self.parse_script_stmt(line) continue if dq_subcmd == "summary": self.parse_summary_section() continue def parse_script_stmt(self, line): args = line.split() test_case = disk_qual_test_case() test_case.parse_args(args) self.test_case_queue.append(test_case) def parse_summary_section(self): """Parse a section starting with 'disk_qual summary -d 3d.00.7' ending with [3d.00.7]: Ending script execution""" reshdr_list = [] while True: line = self.infh.next().strip() # ignore blank lines if len(line) == 0: continue # ignore *> lines sr = disk_qual_reader.prompt_line.match(line) if sr is not None: continue # ignore a CCT distribution line and next sr = disk_qual_reader.CCT_dist_line.match(line) if sr is not None: self.infh.next() continue # next disk_qual command? sr = disk_qual_reader.disk_qual_cmd_line.match(line) if sr is not None: self.test_case_queue.clear() break # [3d.00.7]: Ending script execution statement? sr = disk_qual_reader.ending_script_exec_line.match(line) if sr is not None: self.test_case_queue.clear() break; # test result headings statment? i.e., # disk,result,seq,test,test_name... sr = disk_qual_reader.result_hdr_line.match(line) if sr is not None: reshdr_list = line.split(',') continue # test result statement? # 3d.00.7,passed,0,3,RANDOM READ... resval_list = line.split(',') if (len(resval_list) == len(reshdr_list)): self.parse_test_result(reshdr_list, resval_list) continue if glob_debug: print line def parse_test_result(self, reshdr_list, resval_list): # Retrieve the test case associated with this result test_case = self.test_case_queue.popleft() # Fetch the associated disk disk = disk_qual_disk.get_disk_by_id(test_case.disk_id) result = disk_qual_test_result(disk, test_case, reshdr_list, resval_list) disk.add_test_result(test_case, result) class disk_qual_disk: disk_id_tbl = dict() disk_list = [] @staticmethod def get_disk_by_id(disk_id): if disk_id in disk_qual_disk.disk_id_tbl: disk = disk_qual_disk.disk_id_tbl[disk_id] else: disk = disk_qual_disk(disk_id) return disk @staticmethod def get_disk_list(): return disk_qual_disk.disk_list; def __init__(self, disk_id): assert disk_id not in disk_qual_disk.disk_id_tbl, \ "disk id table programming error" assert self not in disk_qual_disk.disk_list, \ "disk list programming error" self.disk_id = disk_id self.disk_product_id = None # X number of the disk self.disk_vendor_id = None # e.g., NETAPP self.disk_firmware_ver = None # NA01 self.disk_serial_num = None self.test_start_datetime = None self.test_end_datetime = None disk_qual_disk.disk_id_tbl[disk_id] = self disk_qual_disk.disk_list.append(self) self.disk_test_case_list = [] # disk_qual script data self.disk_test_result_list = [] # disk_qual summary data # Used to convert 10od => 41984, 21013496 self.disk_sector_interval_dict = dict() def __str__(self): str = self.disk_vendor_id str += " " + self.disk_product_id str += " " + self.disk_firmware_ver str += " " + self.disk_serial_num str += " (" + self.disk_id + ")" return str def str_for_pathname(self): str = self.disk_vendor_id str += "_" + self.disk_product_id str += "_" + self.disk_firmware_ver return str def save_disk_details(self, result): self.disk_vendor_id = result.get_disk_vendor_id() self.disk_product_id = result.get_disk_product_id() self.disk_firmware_ver = result.get_disk_firmware_ver() self.disk_serial_num = result.get_disk_serial_num() def save_sector_interval_conv(self, test_case, result): sector_interval = test_case.get_sector_interval() if sector_interval not in self.disk_sector_interval_dict: self.disk_sector_interval_dict[sector_interval] = \ disk_qual_sector_range(sector_interval, result.get_first_sector(), result.get_last_sector()) def save_test_start_datetime(self, result): if self.test_start_datetime is not None: return self.test_start_datetime = result.get_test_start_datetime() def save_test_end_datetime(self, result): self.test_end_datetime = result.get_test_end_datetime() def add_test_result(self, test_case, result): assert self.disk_id == result.get_disk_id() self.save_disk_details(result) self.save_sector_interval_conv(test_case, result) self.save_test_start_datetime(result) self.save_test_end_datetime(result) self.disk_test_case_list.append(test_case) self.disk_test_result_list.append(result) def get_test_result_list(self): return self.disk_test_result_list def get_test_case_list(self): return self.disk_test_case_list def get_sector_interval_list(self): sector_interval_list = [] for result in self.disk_test_result_list: test_case = result.test_case sector_interval = test_case.get_sector_interval() if sector_interval not in sector_interval_list: sector_interval_list.append(sector_interval) return sector_interval_list def get_sector_range(self, sector_interval): return self.disk_sector_interval_dict[sector_interval] def get_access_pattern_list(self, sector_interval): access_pattern_list = [] for result in self.disk_test_result_list: test_case = result.test_case access_pattern = test_case.get_access_pattern() if test_case.get_sector_interval() != sector_interval: continue if access_pattern not in access_pattern_list: access_pattern_list.append(access_pattern) return access_pattern_list def get_qdepth_list(self, sector_interval): qdepth_list = [] for result in self.disk_test_result_list: test_case = result.test_case if test_case.get_sector_interval() != sector_interval: continue qdepth = result.get_client_qdepth() if qdepth not in qdepth_list: qdepth_list.append(qdepth) return qdepth_list def get_result(self, sector_interval, access_pattern, qdepth): for result in self.disk_test_result_list: test_case = result.test_case if test_case.get_sector_interval() != sector_interval: continue if test_case.get_access_pattern() != access_pattern: continue if result.get_client_qdepth() != qdepth: continue; return result return None def get_iops_list(self, sector_interval, access_pattern, qdepth_list): iops_list = [] for qdepth in qdepth_list: result = self.get_result(sector_interval, access_pattern, qdepth) if result is None: iops_list.append(0) else: iops_list.append(result.get_iops()) return iops_list def get_avg_rsptime_list(self, sector_interval, access_pattern, qdepth_list): avg_rsptime_list = [] for qdepth in qdepth_list: result = self.get_result(sector_interval, access_pattern, qdepth) if result is None: avg_rsptime_list.append(0) else: avg_rsptime_list.append( result.get_avg_rsptime()) return avg_rsptime_list def get_test_start_datetime(self): return self.test_start_datetime def get_test_end_datetime(self): return self.test_end_datetime class disk_qual_test_case: zcs_read_opt = re.compile(r"^.*zcs_read") dummy_opt = re.compile(r"^.*dummy") def __init__(self): self.test_id = -1 self.qdepth = 3 self.num_io = -1 self.zone_size = 0 self.disk_id = "" self.test_args = "" self.pass_count = 0 self.first_sector = "fs" self.last_sector = 0 self.num_sectors = 8 self.num_seq_sectors = 512 self.num_rand_sectors = 8 self.rand_seed = "" def is_azcs(self): sr = disk_qual_test_case.zcs_read_opt.match(self.test_args) return sr is not None def is_dummy(self): sr = disk_qual_test_case.dummy_opt.match(self.test_args) return sr is not None def is_bcs(self): return not self.is_azcs() and not self.is_dummy() def __str__(self): if self.is_azcs(): r = "ZR" + str(self.qdepth) + "_" + str(self.zone_size) elif self.is_dummy(): r = "DR" + str(self.qdepth) + "_" + str(self.zone_size) else: r = "RR" + str(self.qdepth) r += "_" + str(self.first_sector) return r def parse_args(self, args): try: opts, args = getopt.getopt(args[2:], "t:L:p:f:l:n:x:y:q:o:z:d:S:") except getopt.GetoptError, err: print str(err) print args assert False, "unhandled disk_qual option" for opt, optval in opts: if opt == '-t': # test case id self.test_id = optval elif opt == '-L': # loop count self.num_io = optval elif opt == '-p': # pass count self.pass_count = optval elif opt == '-f': # first sector self.first_sector = optval elif opt == '-l': # last sector self.last_sector = optval elif opt == '-n': # num sectors/io self.num_sectors = optval elif opt == '-x': # num seq sectors/io self.num_seq_sectors = optval elif opt == '-y': # num rand sectors/io self.num_rand_sectors = optval elif opt == '-q': # num outstanding i/o self.qdepth = optval elif opt == '-o': # test spec args self.test_args = optval elif opt == '-z': # zone size self.zone_size = optval elif opt == '-d': # disk id (eg 3d.00.7) self.disk_id = optval elif opt == '-S': # Seed value self.rand_seed = optval else: assert False, "unhandled disk_qual option" def get_sector_interval(self): if self.first_sector.isdigit(): return str(self.first_sector) + "_" + \ str(self.last_sector) else: return self.first_sector def get_access_pattern(self): if self.is_azcs(): r = "azcs" + str(self.zone_size) elif self.is_dummy(): r = "dummy" + str(self.zone_size) elif self.is_bcs(): r = "bcs" else: assert False, "unknown access pattern" return r def get_qdepth(self): return self.qdepth class disk_qual_sector_range: """ Used to convert a 10od value to a set of sector numbers, e.g., 41984, 21013496 based solely on disk_qual script and disk_qual summary messages in the console output. """ def __init__(self, abbr, first_sector, last_sector): self.abbr = abbr # e.g. 10od, 100od, 500od, 100md, # 100id, fs self.first_sector = int(first_sector) self.last_sector = int(last_sector) # disk,result,seq,test,test_name,first,end,block,que,ave,min,max,ave_stop,min_stop,max_stop,t_msecs,t_ops,iops,rate,s_err,h_err,p_err,d_err,seek_t,Glist_s,Glist_e,time_s,time_e,prod,vendor,fw,sn # 3d.00.7,passed,0,3,RANDOM READ,41984,21013496,8,1,6,0,31,0,-1,0,24173,4000,165,0.6,0,0,0,0,implied,0,0,2011/3/30-23:1:30,2011/3/30-23:1:54,X308_HMARK03TSSA,NETAPP ,NQ0B,YHG2LJ1A class disk_qual_test_result: """ Captures a disk_qual summary results line """ def __init__(self, disk, test_case, reshdr_list, resval_list): global glob_debug self.disk = disk self.test_case = test_case self.result_tbl = dict() self.client_qdepth = 0 for key, val in zip(reshdr_list, resval_list): self.result_tbl[key] = val.strip() if glob_debug: print reshdr_list print resval_list if self.get("result") != "passed": print "%s: NOTE: result status not passed '%s'" % ( glob_arg_app_name, self.get("result")) print self def __str__(self): r = str(self.test_case) r += "_r" + self.get("result") r += "_d" + self.get("disk") r += "_f" + self.get("first") r += "_e" + self.get("end") r += "_q" + self.get("que") r += "_i" + self.get("iops") return r; def get(self, key): return self.result_tbl[key] def get_client_qdepth(self): # NOTE: the value reported by disk_qual in the 'que' field # has changed from the disk qdepth to the client qdepth, so # the reported in '-q ' of the disk_qual script # line is used in its place. return self.test_case.get_qdepth() def get_iops(self): global glob_arg_azcs_in_disk_iops iops = int(self.result_tbl["iops"]) # disk_qual reports 'iops' from the perspective of the disk. # With AZCS proximal I/Os the client only sees 1/2 the iops # from the disk due to pair of reads for a single data block, # so for azcs test cases iops is halved. if glob_arg_azcs_in_disk_iops and self.test_case.is_azcs(): iops /= 2 return iops def get_avg_rsptime(self): return float(self.result_tbl["ave"]) def get_disk_id(self): return str(self.result_tbl["disk"]) def get_disk_product_id(self): return str(self.result_tbl["prod"]) def get_disk_vendor_id(self): return str(self.result_tbl["vendor"]) def get_disk_firmware_ver(self): return str(self.result_tbl["fw"]) def get_disk_serial_num(self): return str(self.result_tbl["sn"]) def get_first_sector(self): return int(self.result_tbl["first"]) def get_last_sector(self): return int(self.result_tbl["end"]) def get_test_start_datetime(self): return self.result_tbl["time_s"] def get_test_end_datetime(self): return self.result_tbl["time_e"] # Generates the output plots class plot_file_writer: def __init__(self): disk = None sector_interval = None access_pattern = None access_pattern_list = [] qdepth_list = [] return def calc_pctdiff(self, new, old): """ Calculate the percentage difference between old and new For example: new=100, old=50 results in 66.6% new=20, old=100 results in -100.0% (-133.3% rounded) """ nval = float(new) oval = float(old) res = float(0) avg = float((nval + oval) / 2) if avg == 0: return 100.0 res = (nval - oval) / avg * 100.0 if res > 100.0: res = 100.0 elif res < -100.0: res = -100.0 return res def calc_iops_pctdiff(self, access_pattern_a, access_pattern_b, qdepth_list): """ Calculates the iops percentage difference between access pattern _a_ and access pattern _b_, for all qdepth values in qdepth_list. The resulting percentages are returned in a list. """ iops_pctdiff_list = [] pata_iops_list = self.disk.get_iops_list(self.sector_interval, access_pattern_a, qdepth_list) patb_iops_list = self.disk.get_iops_list(self.sector_interval, access_pattern_b, qdepth_list) for i in range(0, len(qdepth_list)): pct = self.calc_pctdiff(pata_iops_list[i], patb_iops_list[i]) iops_pctdiff_list.append(pct) return iops_pctdiff_list def make_title(self): sector_range = self.disk.get_sector_range(self.sector_interval) title = "" if glob_arg_disk_friendly_name is not None: title += glob_arg_disk_friendly_name + " - " title += str(self.disk) + "\n" title += "Sector interval [%d, %d] - %s\n" % ( sector_range.first_sector, sector_range.last_sector, self.sector_interval) title += "Test Start: %s, End: %s" % ( self.disk.get_test_start_datetime(), self.disk.get_test_end_datetime()) if glob_kernel_version is not None: title += "\n" + glob_kernel_version if glob_arg_run_note is not None: title += "\n" + glob_arg_run_note return title def setup_chart(self): """ Performs setup steps for plotting a chart. """ #plt.rcParams['font.size'] = '11' plt.figure(figsize=(9,6)) plt.subplots_adjust(top=0.86, hspace=0.4) plt.gcf().set_size_inches(8,10) plt.suptitle(self.make_title(), fontsize="medium") #, y=0.99) def plot_figure_iops_vs_qdepth(self): """ Plots the iops versus qdepth figure """ plt.ylabel('IOPS') plt.xlabel('Queue Depth') for access_pattern in self.access_pattern_list: iops_list = self.disk.get_iops_list( self.sector_interval, access_pattern, self.qdepth_list) print access_pattern + " iops: " + str(iops_list) # Draw the line plt.plot(self.qdepth_list, iops_list, "x-", label=access_pattern) # Label the data points for qdepth, iops in zip(self.qdepth_list, iops_list): plt.text(qdepth, iops, '%d' % int(iops), size='x-small') def plot_figure_rsptime_vs_iops(self): """ Plots the response time versus iops figure """ plt.ylabel('Avg Rsp Time/IO (ms)') plt.xlabel('IOPS') for access_pattern in self.access_pattern_list: avg_rsptime_list = self.disk.get_avg_rsptime_list( self.sector_interval, access_pattern, self.qdepth_list) iops_list = self.disk.get_iops_list( self.sector_interval, access_pattern, self.qdepth_list) print access_pattern + " avg_rsptime: ", print ["%0.1f" % i for i in avg_rsptime_list] plt.plot(iops_list, avg_rsptime_list, "+-", label=access_pattern) for iops, rspt in zip (iops_list, avg_rsptime_list): plt.text(iops, rspt, '%.1f' % rspt, size='x-small') def plot_figure_iops_pctdiff_vs_qdepth(self): """ Plots the iops percentage difference versus qdepth figure """ plt.ylabel('IOPS Pct Diff') plt.xlabel('Queue Depth') pct32_list = self.calc_iops_pctdiff("azcs32", "dummy32", self.qdepth_list) print "pctdiff(azcs32/dummy32): ", print ["%0.1f" % i for i in pct32_list] plt.plot(self.qdepth_list, pct32_list, ".-", label="azcs32/dummy32") for qdepth, pct in zip(self.qdepth_list, pct32_list): plt.text(qdepth, pct, '%.1f' % pct, size='x-small') pct64_list = self.calc_iops_pctdiff("azcs64", "dummy64", self.qdepth_list) print "pctdiff(azcs64/dummy64): ", print ["%0.1f" % i for i in pct32_list] plt.plot(self.qdepth_list, pct64_list, ".-", label="azcs64/dummy64") for qdepth, pct in zip(self.qdepth_list, pct64_list): plt.text(qdepth, pct, '%.1f' % pct, size='x-small') def set_legend(self): """ Applies a legend to a previously drawn figure """ plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, ncol=5, mode="expand", borderaxespad=0., columnspacing=0.1, labelspacing=0.1, handletextpad=0.1) leg = plt.gca().get_legend() leg.draw_frame(False) ltext = leg.get_texts() llines = leg.get_lines() plt.setp(ltext, fontsize='x-small') plt.setp(llines, linewidth=1.5) def write_footnotes(self): ax = plt.gca() ax.text(0, 0, footnote, fontsize="small") ax.set_axis_off() def plot_chart_for_sector_interval(self, sector_interval): """ Plots all the figures for a single sector interval chart """ global glob_arg_output_fmt global glob_arg_output_dir self.access_pattern_list = self.disk.get_access_pattern_list( sector_interval) self.qdepth_list = self.disk.get_qdepth_list(sector_interval) print "qdepth: " + str(self.qdepth_list) self.setup_chart() plt.subplot(311) # nrows, ncols, nfig plt.grid() self.plot_figure_iops_vs_qdepth() self.set_legend() plt.subplot(312) plt.grid() self.plot_figure_rsptime_vs_iops() self.set_legend() plt.subplot(313) plt.grid() self.plot_figure_iops_pctdiff_vs_qdepth() self.set_legend() chart_name = "%s%s%s_%s.%s" % (glob_arg_output_dir, os.sep, self.disk.str_for_pathname(), sector_interval, glob_arg_output_fmt) plt.savefig(chart_name) def plot_charts_for_disk(self, disk): """ Plots all the figures for a single disk """ global glob_debug sector_interval_list = self.disk.get_sector_interval_list() print "%s: found %d results for disk %s" % ( glob_arg_app_name, len(disk.disk_test_result_list), disk) if glob_debug: test_case_list = disk.get_test_case_list() for test_case in test_case_list: print test_case for self.sector_interval in sector_interval_list: print self.sector_interval self.plot_chart_for_sector_interval( self.sector_interval) def plot(self): """ For each disk in the disk list, plot all charts """ disk_list = disk_qual_disk.get_disk_list() for self.disk in disk_list: print self.disk self.plot_charts_for_disk(self.disk) ## # MAIN # def main(): infh = parse_cmdln_args() reader = disk_qual_reader(infh) writer = plot_file_writer() reader.parse_file() writer.plot() if __name__ == "__main__": main()