## @steps ## The test will execute steps mentioned below: ## 1. Set privilege level to Test ## 2. Record all Phy Change Counts for all drives present ## 3. Record power cycle count and power cycle failures for each drive ## 4. Start 2 threads of hammer ## 5. Power cycle one drive in the filesystem that hammer is running on ## 6. Issue these commands once every 5 mins ## 7. Dwell time between power cycles ## 8. Repeat steps 7-8 for 12 hours ## 9. Stop hammer execution ## 10. Stop any disk scrub that might be running ## 11. Record all Phy Change Counts for all drives present ## 12. Record power cycle count and power cycle failures for each drive ## 13. pull BSAS link speed logs ## ## @status Automated ## @author sags@netapp.com ## @burt 1221707 ## ############################################################################# ############################################################################# ######################################## ### Library functions ########################################## use strict; use File::Copy; use Storage::NVMe_Common_Lib; use Storage::Tahiti_Common_Lib; use Storage::Common_Lib; use Storage::SASUtils qw(disable_dbg_msgs); use Storage::Sasadmin qw(sasadmin_dev_power_cycle); ######################################### ### Initialization/declaration ######################################### use vars qw( $FILER $TEST_SETUP $MODE $LOGDIR $EOE $TEST_WAIT_TIME $MAIL_TO $MAIL_FROM $EMAIL $POW_SLP_TIME $FIPS_SELECTED ); my $params = NATE::ParamSet->new( global => 1 ); $LOGDIR = $params->get( 'LOGDIR', default => undef ); $EOE = $params->get( 'EOE', default => 'default' ); $EMAIL = $params->get( 'EMAIL', default => 'y' ); $MAIL_TO = $params->get( 'MAIL_TO', default => 'Email to' ); $MAIL_FROM = $params->get( 'MAIL_FROM', default => 'Email from' ); $FILER = $params->get( 'FILER', default => 'Filer' ); $TEST_SETUP = $params->get( 'TEST_SETUP', default => 'SAS' ); $TEST_WAIT_TIME = $params->get( 'TEST_WAIT_TIME', default => '3' ); $MODE = $params->get( 'MODE', default => 'normal' ); $POW_SLP_TIME = $params->get( 'POW_SLP_TIME', default => '120' ); $FIPS_SELECTED = $params->get( 'FIPS_SELECTED', default => 'NO' ); #Common variable declaration my ( $email_subject, $email_body ); my @Nodes; my $filer_names; my %nodes_filer = {}; #Set test fail parameter to 0 my $test_status = 0; my $Mode; my $TC_name; my $pre_post_flag = 0; $TC_name = "325_NADQ02_NVMe_Th_Psm_Reset"; my $test_wait_time = 21600; #6hrs logcomment("Default wait time is 6 hrs"); ######################################### ## Pre-test processes ######################################### my @Testcases = ( Thiti_PwrCyc => "Th_Psm_Reset" ); &main(); sub main { # Debug break point $DB::single = 2; # Create Test Case Driver object $Test = new TCD( -testcases => [@Testcases] ); if ( $Test->error ) { $Test->nlog( $Test->errmsg ); $test_status = 1; $email_subject = "$TC_name : Test FAILED: FAIL"; $email_body = "Failed to instantiate Test object.\nLog Location : $LOGDIR\n"; send_message( mail_subject => $email_subject, mail_body => $email_body, MAIL_FROM => $MAIL_FROM, MAIL_TO => $MAIL_TO, EOE => $EOE, EMAIL => $EMAIL ); return $TCD::FAIL; } # Performs method callbacks $Test->run_test(); if ( $Test->error ) { $Test->nlog( $Test->errmsg ); return $TCD::FAIL; } exit(0); } ## end sub main ########## INIT ################################################### # This init subroutine will initialise the filer. #################################################################### sub init() { $Test->description("Initialising all required variables and FILER connections "); $filer_names = $Test->get_param("FILER"); # Capturing Filer names from the param(Test_Suite) ##Check for duplicate node object and push unique node object in Nodes array. my @temp_nodes = NACL::C::Node->find(); # Find Nodes/Filers used in the test, Based on FILER param. foreach my $Node (@temp_nodes) { my $FILER_C = $Node->name(); $nodes_filer{$FILER_C} = [$Node]; } @Nodes = map { @{$_} } grep { (/\S/) } values(%nodes_filer); # Contains Node objects used for test execution grep { logcomment( "Filers passed to the ntest are " . $_->name() ) } @Nodes; $Mode = $Nodes[0]->mode(); version_test( node_present => \@Nodes, tc_name => $TC_name ); logcomment("Checking for execution mode"); $Test->nlog( "Filer - $filer_names : $TC_name : started, expected max completion time 12 hours : " . scalar( localtime() ) ); $Test->nlog( "Filer - $filer_names : Log file for this test case: \n $LOGDIR/$TC_name" . ".log " . scalar( localtime() ) ); logcomment("The user entered power cycle sleep time as $POW_SLP_TIME"); return $TCD::PASS; } ########## SETUP ################################################### # Setup automatically called before executing tests ##################################################################### sub setup() { $Test->description("Setup the environment for the test exectuion "); $Test->nlog("Mode of filer $filer_names : $Mode"); my $node_ref = \@Nodes; version_test( node_present => $node_ref, tc_name => $TC_name ); ##################################################################### # Pre test proces : call for pre_n_post test process ##################################################################### nvme_pre_test( node_present => $node_ref, Test => $Test, change_state_to => "CLI", filer_mode => $Mode, Tahiti => "yes" ); ########################################################################################### #Stop the scrub process if already started foreach my $Node (@Nodes) { $Test->description("Ensuring that the scrub and hammer process is stopped before the script starts execution"); my $Api_Set_Obj = $Node->get_7m_or_nodescope_apiset( connid => 'console' ); $Api_Set_Obj->execute_command( 'command' => "disk scrub stop" ); sleep 5; my $response = $Api_Set_Obj->hammer_abort(); sleep 5; } return $TCD::PASS; } ########## TEST 1 ################################################### #NADQ02_Th_Psm_Reset ##################################################################### sub Thiti_PwrCyc { my @subtests; my $partition = 0; my $return; logcomment( "Number of nodes are " . scalar @Nodes ); logcomment("Checking whether the system is Partitioned or Unpartitioned"); my $FILER_C = $Nodes[0]->name(); my $disk_hash = disk_show_v( Node => $Nodes[0] ); foreach ( keys %{$disk_hash} ) { if ( $_ =~ /P\d+$/ ) { $partition = 1; last; } } if ( $partition == 1 ) { my @Nodes_tmp = (); push( @Nodes_tmp, $Nodes[0] ); my $Filer = $Nodes[0]->name(); logcomment("SYSTEM is Partitioned, Test will run only on single node i.e., on $Filer node"); @Nodes = (); @Nodes = @Nodes_tmp; } foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); logcomment("Test started on $FILER_C"); push( @subtests, subtest( \&Psm_Reset, -runid, "PwrCyc_$FILER_C", -bg, "--", $Node ) ); Subtest::wait_finish( subtest => [@subtests] ); $return = status_return( subtest_ref => [@subtests] ); logcomment("Test status is $return"); logcomment("Test Completed on $FILER_C"); } logcomment("Test status is $return"); if ( $return == 0 ) { return $TCD::PASS; } else { return $TCD::FAIL; } } ####################### ##Subtest_create ####################### sub Psm_Reset { my @Nodes; push( @Nodes, shift(@_) ); ########################### ## ## ## Test variables ## ## ## ########################### my $shelf; my $port; my $FILER_C; my $Api_Set_Obj; my $non_rt_status; my $FILER_SIO = 20; my @non_rt_agg; my @list_of_disk; my @disk_list; my @loc_all_threads; my $sto_show_data = {}; my %power_cyc = (); my @list_of_disk_2_pwrcy = (); $Test->description("Power Cycle test"); ############################################################################################## ## ## ##STEP 1 - Set privilege level to Test ## ## ## ############################################################################################## foreach my $Node (@Nodes) { $FILER_C = $Node->name(); tahiti_eye_catcher( Test => $Test, string => "STEP 1 of 11 : $FILER_C : Set privilege level to Test" ); my $Api_Set_Obj = $Node->get_7m_or_nodescope_apiset( connid => "console" ); my $prompts_answers = [ ".*Do you want to continue.*" => 'y' ]; $Api_Set_Obj->execute_raw_command( 'command' => "set test", 'connectrec-match_table' => $prompts_answers ); ############################################################################################## ## ## ##STEP 2 - Record all Phy Change Counts for all drives present ## ## ## ############################################################################################## tahiti_eye_catcher( Test => $Test, string => "STEP 2 of 11 : $FILER_C : Record all Phy Change Counts for all drives present" ); @list_of_disk = tahiti_get_disk_owned( Node => $Node ); my $dsks = _print_array( \@list_of_disk ); logcomment( "Filer : $FILER_C : Total number of disk count present in the filer is " . scalar @list_of_disk . " and the drives are:\n" . $dsks ) if (@list_of_disk); @disk_list = @list_of_disk; ############################################################################################## ## ## ##STEP 3 - Check for non-root aggregate, If already present delete and create new aggregate ## ## ## ############################################################################################## tahiti_eye_catcher( Test => $Test, string => "STEP 3 of 11 : $FILER_C : Check for non-root aggregate, If already present delete and create new aggregate" ); $Api_Set_Obj->set_timeout( "connectrec-timeout" => 72000000 ); $Api_Set_Obj->execute_raw_command( 'command' => "\013" ); $Api_Set_Obj->execute_raw_command( 'command' => "\013" ); logcomment("Filer : $FILER_C : Check for non-root aggregate"); ( $non_rt_status, @non_rt_agg ) = nvme_check_non_root_aggr( Node => $Node ); if (@non_rt_agg) { logcomment( "Filer : $FILER_C : Total non-root aggregate(s) present in filer is" . scalar @non_rt_agg . " and Non -root aggregate are: @non_rt_agg" ); logcomment("Filer : $FILER_C : Deleting Existing aggregate...."); vserver_delete( Node => $Node ); #Vserver deletion aggregate_volume_delete( Node => $Node ); $Api_Set_Obj->execute_raw_command( command => "run local disk zero spares" ); sleep 20; $Api_Set_Obj->execute_raw_command( command => "\013" ); } logcomment("Filer : $FILER_C : Non-root aggregate cannot be found"); sleep 30; my $spare_list = tahiti_sto_agg_show_spare_dsk( Node => $Node ); my @spare_drvs = keys %$spare_list; my $dsks = _print_array( \@spare_drvs ); logcomment( "Filer : $FILER_C : Total spare count is : " . scalar @spare_drvs . " and Spare Drive are :\n" . $dsks ); logcomment("DEBUG :Filersio threads : $FILER_SIO "); sleep 30; $Api_Set_Obj->execute_raw_command( 'command' => "\013" ); my $return_filersio_thrds = nvme_multi_aggr_with_rd_wr_vol_n_fsio( API_con => $Api_Set_Obj, handle => "custom", filer => $FILER_C, fsio => $FILER_SIO, loc_drvs => \@spare_drvs, Node => $Node, ); foreach my $fil ( keys %$return_filersio_thrds ) { my $sio_th = $return_filersio_thrds->{$fil}->{'SIO_CMD'}; @loc_all_threads = @$sio_th; } logcomment("Filer : $FILER_C : Filer sio Started "); logcomment("Filer : $FILER_C : @loc_all_threads"); logcomment("Filer : $FILER_C : completed"); sleep 30; $Api_Set_Obj->execute_raw_command( 'command' => "\013" ); ## Get drives where IO are running. logcomment("Filer : $FILER_C : Checking for non-root aggregate(s) created by using Tahiti Drives"); ( $non_rt_status, @non_rt_agg ) = nvme_check_non_root_aggr( Node => $Node ); logcomment("Filer : $FILER_C : Get Tahiti drives where IO are running"); logcomment("Filer : $FILER_C : Get Tahiti Drives of all non-root aggregates : @non_rt_agg"); foreach my $agr (@non_rt_agg) { logcomment("Get drives of $agr"); $Api_Set_Obj->set_timeout( 'connectrec-timeout' => 7200 ); my $after = $Api_Set_Obj->execute_command( 'command' => "aggr status -r $agr" ); foreach ( split( /\n/, $after ) ) { next if ( $_ =~ /parity uninit/ ); if ( $_ =~ /(data|dparity|parity)\s*(\S+)/ ) { my $disk = $2; $disk =~ s/(P|L)\d+$//g; push( @list_of_disk_2_pwrcy, $disk ); } } } my $dsks = _print_array( \@list_of_disk_2_pwrcy ); logcomment( "The drives mentioned below will be powercycled , where IO are running:\n" . $dsks ); ############################################################################################## ## ## ##STEP 4 - Set scrub priority level ## ## ## ############################################################################################## tahiti_eye_catcher( Test => $Test, string => "STEP 4 of 11 : $FILER_C : Set scrub priority level" ); $Api_Set_Obj->execute_raw_command( 'command' => "storage raid-options modify -node $FILER_C -name raid.scrub.perf_impact -value medium" ); ################################################################################################## ## ## ##STEP 5 - Power cycle each drive in succession in the file-system that hammer is running on ## ## ## ################################################################################################## tahiti_eye_catcher( Test => $Test, string => "STEP 5 of 11 : $FILER_C :Power cycle each drive in succession in the filesystem that hammer is running on" ); logcomment( "Filer : $FILER_C : The total number of drives which will be Power cycling is: " . scalar(@list_of_disk_2_pwrcy) . " on which IO is running. And drives are: @list_of_disk_2_pwrcy" ); if ( scalar(@list_of_disk_2_pwrcy) == 0 ) { logcomment("**FATAL**: $FILER_C Test case will not be proceed because NON ROOT VOLUMES NOT EXIST OR HAMMER IS NOT RUNNING ON THE DISKS - STEP : 3"); return logresult( "INFO", msg => 1 ); } logcomment( "Filer : $FILER_C : Power cycling started at " . scalar( localtime() ) ); my $loop = 1; my $count = 1; my ( $s, $m ) = gmtime($POW_SLP_TIME); logcomment("Filer : $FILER_C : Alotted Sleep Time : $POW_SLP_TIME seconds (i.e) $m MINS $s SECS\n"); my $start_time_sec = time; my $end_time_sec = $start_time_sec + $test_wait_time; while ( $start_time_sec < $end_time_sec ) { my $step_6 = 1; tahiti_eye_catcher( Test => $Test, string => "STEP 5($loop) of 11 : $FILER_C :Power cycle each drive in succession in the filesystem that hammer is running on" ) if ( $loop > 1 ); my $first = 1; #FIPS CHECK if ( $FIPS_SELECTED =~ /yes/i ) { logcomment("Filer : $FILER_C : FIPS option is been selected, checking for SED drive and data key lock state"); my $ret = check_encry_okm_lock_state( Node => $Node ); if ( $ret == 1 ) { logcomment("**WARNING**: Filer - $FILER_C - SED drives cannot be re-locked or modify"); } } if ( $loop == 1 ) { tahiti_eye_catcher( Test => $Test, string => "STEP 6 of 11 : $FILER_C :Repeat steps 5-8 for 12 hours" ); my ( $sec, $min, $hrs ) = gmtime($test_wait_time); logcomment("User selected -w = $TEST_WAIT_TIME so the looping time will be $hrs Hours $min Minutes and $sec Seconds"); logcomment( "Start time for looping is " . scalar( localtime() ) ); } if ( $loop > 1 ) { my $time_left = $end_time_sec - $start_time_sec; my ( $lsec, $lmin, $lhrs ) = gmtime($time_left) if ( $time_left > 0 ); logcomment("Remaining time for looping is $lhrs Hours $lmin Minutes and $lsec Seconds") if ( $time_left > 0 ); } foreach my $sel_drive (@list_of_disk) { next if ( $sel_drive =~ /P1|P2|P3/ ); if ( $sel_drive =~ /\S+\.\S+\.\S+\.(\S+)/ ) { $port = $1 + 6; } if ( $sel_drive =~ /(\S+\.(\S+)\.\S+\.\S+)/ ) { $shelf = $2; } logcomment("Filer : $FILER_C : The port of the drive : $sel_drive is : $port"); logcomment("Filer : $FILER_C : The shelf of the drive : $sel_drive is : $shelf"); my $API_obj = $Node->get_7m_or_nodescope_apiset( connid => "console" ); sleep 2; logcomment("$FILER_C : Issuing psmadmin_reset on drive $sel_drive "); my $psmadmin_out = $API_obj->execute_command( 'command' => "psmadmin inband_cli 0x.$shelf 'psmmgr_tp 4 $port 3000'", 'timeout' => 600000 ); if ( $psmadmin_out =~ /Executed/ ) { logcomment("Executed command psmadmin successfully on $sel_drive"); } else { logcomment("**FATAL** : $FILER_C : FAILED to execute psmadmin command on drive $sel_drive"); return logresult( "INFO", msg => 1 ); } $power_cyc{$sel_drive}++; tahiti_eye_catcher( Test => $Test, string => "STEP 7 of 11 : $FILER_C :Dwell time between power cycles " ) if ( ( $first == 1 ) && ( $loop == 1 ) ); tahiti_eye_catcher( Test => $Test, string => "STEP 8($loop) of 11 : $FILER_C :Dwell time between power cycles " ) if ( ( $first == 1 ) && ( $loop > 1 ) ); my ( $s, $m ) = gmtime($POW_SLP_TIME); logcomment("Filer : $FILER_C : Sleeping for $POW_SLP_TIME seconds (i.e) $m MINS $s SECS\n"); sleep($POW_SLP_TIME); $API_obj->set_timeout( "connectrec-timeout" => 18000 ); $API_obj->execute_raw_command( 'command' => "\013" ); $API_obj->execute_raw_command( 'command' => "\013" ); $API_obj->execute_raw_command( 'command' => "\013" ); $first++; } ## end of foreach $sel_drive (@hammer_run) $start_time_sec = time; $loop++; $count++; } ## end of while ($start_time_sec < $end_time_sec) logcomment( "End time for looping is " . scalar( localtime() ) ); logcomment( "Power cycling on all the drives completed on filer $FILER_C " . scalar( localtime() ) ); logcomment("The Power Cycle count details after completion of looping is : "); foreach $_ ( keys %power_cyc ) { logcomment("Drive : $_ \t Power Cycled : $power_cyc{$_} times\n"); } } ################################################################################################## ## ## ##STEP 9 - Stop all IO threads ## ## ## ################################################################################################## foreach my $Node (@Nodes) { my $API_obj = $Node->get_7m_or_nodescope_apiset(); my $FILER_C = $Node->name(); tahiti_eye_catcher( Test => $Test, string => "STEP 9 of 11 : $FILER_C :Stop all IO threads" ); my $ret; try { logcomment("$FILER_C : Aborting IO"); $ret = $API_obj->execute_command( 'command' => "filersio stop" ); } catch NACL::APISet::Exceptions::ResponseException with { my $exception_object = shift; my $resp = $exception_object->get_response_object(); $Test->nlog( "Caught a " . ref($exception_object) . "!" ); $Test->nlog( "Error is: " . $exception_object->text() ); $Test->nlog( "Raw output was:\n" . $resp->get_raw_output() ); $Test->nlog("Hammer abort has not done properly"); }; } ################################################################################################## ## ## ##STEP 10 - Stop any disk scrub that might be running ## ## ## ################################################################################################## foreach my $Node (@Nodes) { my $API_obj = $Node->get_7m_or_nodescope_apiset(); my $FILER_C = $Node->name(); tahiti_eye_catcher( Test => $Test, string => "STEP 10 of 11 : $FILER_C :Stop any disk scrub that might be running" ); try { $API_obj->execute_command( 'command' => 'disk scrub stop' ); } catch NACL::APISet::Exceptions::ResponseException with { logcomment("Disk srub cannot be stopped"); }; } ################################################################################################## ## ## ##STEP 11 - Pull BSAS link speed logs ## ## ## ################################################################################################## if ( $TEST_SETUP !~ /FC/ig ) { foreach my $Node (@Nodes) { my $API_obj = $Node->get_7m_or_nodescope_apiset(); my $FILER_C = $Node->name(); tahiti_eye_catcher( Test => $Test, string => "STEP 11 of 11 : $FILER_C :Pulling BSAS link speed logs" ); my $Mode = $Node->mode(); my $stsb_status = get_filer_check( Node => $Node ); $sto_show_data = storage_show_disk_a_data( Node => $Node ); # Collect the SM/SA/SDB disks if it is BSAS my ( $ssm, $ssa, $sdb, $dir ); ( $ssm, $ssa, $sdb ) = ( 0, 0, 0 ); my %dr_id = (); my @temp = grep { !/P\d+$/ } @disk_list; my $dsks = _print_array( \@temp ); logcomment( "Filer : $FILER_C : Checking Model number of the disks mentioned below:\n" . $dsks ); foreach my $disk (@temp) { my $mdl = $sto_show_data->{$disk}->{'Model'}; logcomment("Filer : $FILER_C : Model of disk $disk is $mdl"); if ( $mdl =~ /(SA)$/ ) { $ssa = 1; $dr_id{$disk} = $1; logcomment("SA: $disk - $1"); logcomment("Filbert(SSA): Writing crash dump to file of the $disk - type $dr_id{$disk}"); $API_obj->execute_command( 'command' => "$stsb_status get_crash_dump $disk" ); } elsif ( $mdl =~ /(SM)$/ ) { $ssm = 1; $dr_id{$disk} = $1; logcomment("SM: $disk - $1"); logcomment("Acorn(SSM): Scsi modesense of the disk $disk - type $dr_id{$disk}"); $API_obj->execute_command( 'command' => "scsi modesense -p 0x3a $disk" ); } elsif ( $mdl =~ /(SDB)$/ ) { $sdb = 1; $dr_id{$disk} = $1; print "SSD: $disk - $1\n"; logcomment("Marvell(SDB): Acorn get status of the disk $disk - type $dr_id{$disk}"); $API_obj->execute_command( 'command' => "$stsb_status get_status $disk" ); } } if ( ( $ssm == 0 ) && ( $ssa == 0 ) && ( $sdb == 0 ) ) { logcomment("Step 11: No SM/SA/SDB drives found - Exiting the Step 11"); } } # end foreach my $Node (@Nodes) } else { tahiti_eye_catcher( Test => $Test, string => "STEP 11 of 11 : Pulling BSAS link speed logs" ); logcomment("These additional steps are only for BSAS and MSATA drives"); } logcomment("Filer : $FILER_C : Deleting Existing Aggregate(s) and volume(s)"); foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); ( $non_rt_status, @non_rt_agg ) = nvme_check_non_root_aggr( Node => $Node ); if (@non_rt_agg) { vserver_delete( Node => $Node ); #Vserver deletion aggregate_volume_delete( Node => $Node ); my $Api_Set_Obj = $Node->get_7m_or_nodescope_apiset( connid => 'console' ); $Api_Set_Obj->execute_raw_command( command => "run local disk zero spares" ); sleep 20; $Api_Set_Obj->execute_raw_command( command => "\013" ); } } ########################################################################################### ## Test log/results ########################################################################################### logresult( "INFO", msg => $test_status ); } ##################################################################### # Cleanup - Post Process ##################################################################### sub cleanup() { @Nodes = node_new_obj( node_array => [@Nodes] ); logcomment(" $filer_names - Clean up and post test process"); my $node_ref = \@Nodes; ########################################################################################### ## Post Test process - Category : "post_test" ########################################################################################### nvme_post_test( node_present => $node_ref, Test => $Test, change_state_to => "MAINT", filer_mode => $Mode, Tahiti => "yes" ); return $TCD::PASS; }