#!/usr/software/bin/perl # $Id: //depot/prod/test/Rsierranevada/storage/hdd/NADQ_SEA/BR/NADQ02_ERT_Fsys_Reconstruction.thpl#2 $ # ## @summary Mode ## ERT file system tests # ## ## @description ## The basic process to follow is to maintain the HW configuration of the systems from the Mode 1 testing. ## Reboot the filers and create file systems on the three Configurations as called out in the ## reference tables for systems ERT-1, ERT-2 and ERT-3. Then start a test and let it run overnight. ## Next morning, stop the hammer test and any error injection that is running; and run the verify script. ## Start a new test and continue. The test operator should make sure to add his email address to the ## autosupport.to list so that he can monitor any failures. ## ## @Test Mode ## File System mode ## ## @Test bed setup ## SAS :Dual Path, ## ## @usage ## The test can be run independently or with other tests as part of an STEST. ## ## @step in days ## The test will execute commands mentioned below: ## ## Day 43 Start hammer 65536 ## Start disk scrub ## Fail a single of the parity drives ## disk unfail ## Start disk scrub ## Wait for scrub to stop ## Stop hammer ## Take logs # ## ## @param FILER - required for cluster setup ## - optional for Dual path setup Name of filer to be used ## @param TEST_SETUP - optional Default value set to 'FC' ## @param FILER_CONN - optional Type of connection, default is set to 'rsh' for this test ## @param MODE - optional Filer mode maint/normal, default set to 'normal' for this test ## @param FILER_PROMPT - optional prompt of the filer ## @param LOGDIR - optional This is required to generate 'END-LOG'. Default set to main log directory. ## @param EOE - optional 'default' is set to default value ## @param FILER_TYPE - Required to set DataONTAP version DataONTAP version is either IC or BR ## @param ARMADILLO - This parameter will indicate if an ARMADILLO configuration is under testing (if yes, ARMADILLO=1 else ARMADILLO=2) ## @param BOOT_MODE - optional Boot mode for filer , default set to '1' for this test ## @param BOOT_TYPE - optional Boot type, default set to 'A' (automatic) for this test ## @param MAX_DRIVES_TO_FAIL - optional 'default' is set to 5.Represents max no of parity drives to be failed. ## @param MAX_DATA_DRIVES_TO_FAIL - optional 'default' is set to 3.Represents max no of data drives to be failed. ## @param MAX_SCRUB_OPERATIONS - optional 'default' is set to 1.Represents max no of scrub operations to be performed. ## ## @status Automated ## @auther abalaji@netapp.com ## @burt 1003406 ## @burt 541139 ## @change 11/10/28 from mcb: Step 20 added. ## @burt 541139 ## @change 11/11/10 from mcb: Step 8 modified. ## @burt 555549 ## @change 11/12/21 from arunak: Step 10 added (disk copy). ## ## ############################################################################# ######################################## ### Library functions ########################################## # Compiler directives. use strict; # Module imports # C MODE MODULES imports use Storage::Common_Lib; use POSIX; ######################################## ### Initialization/declaration ######################################### use vars qw( $Test $MODE $filer_names $TC_name $EOE $LOGDIR $MAIL_TO $MAIL_FROM $EMAIL $FILER_TYPE $HYBRID_AGGR $FILER $TEST_CONFIG $TEST_SETUP $FILER_PROMPT $TEST_WAIT_TIME $BOOT_MODE $BOOT_TYPE $ERT_VOL $MAX_DRIVES_TO_FAIL $MAX_SCRUB_OPERATIONS $MAX_DATA_DRIVES_TO_FAIL $Primary_Node ); my $params = NATE::ParamSet->new( global => 1 ); $FILER = $params->get( 'FILER', default => 'Filer' ); $MODE = $params->get( "MODE", default => 'normal' ); $TEST_CONFIG = $params->get( 'TEST_CONFIG', default => 'D' ); $TEST_WAIT_TIME = $params->get( 'TEST_WAIT_TIME', default => '3' ); $LOGDIR = $params->get( 'LOGDIR', default => undef ); $EOE = $params->get( 'EOE', default => 'default' ); $EMAIL = $params->get( 'EMAIL', default => 'y' ); $MAIL_TO = $params->get( 'MAIL_TO', default => 'Email to' ); $MAIL_FROM = $params->get( 'MAIL_FROM', default => 'Email from' ); $FILER_PROMPT = $params->get( 'FILER_PROMPT', default => '\*>' ); $TEST_SETUP = $params->get( 'TEST_SETUP', default => 'SAS' ); $BOOT_MODE = $params->get( 'BOOT_MODE', default => '1' ); $BOOT_TYPE = $params->get( 'BOOT_TYPE', default => 'A' ); $FILER_TYPE = $params->get( 'FILER_TYPE', default => 'BR' ); $Primary_Node = $params->get( 'Primary_Node', default => 'N' ); $ERT_VOL = $params->get( 'ERT_VOL', default => '1' ); $MAX_DRIVES_TO_FAIL = $params->get( 'MAX_DRIVES_TO_FAIL', default => '1' ); $MAX_DATA_DRIVES_TO_FAIL = $params->get( 'MAX_DATA_DRIVES_TO_FAIL', default => '1' ); $MAX_SCRUB_OPERATIONS = $params->get( 'MAX_SCRUB_OPERATIONS', default => '1' ); my $TC_name = "NADQ02_ERT_Fsys_Reconstruction_43"; # 7Mode Global Variables my %filer_del_aggr = (); my $vol_flag; my @non_root; my $filer; my $filer_con; my $bootfile; my @filers; my @filers_rsh; my $fail_cnt; my $filer_con_rsh; #my $faildrives; my %fail_data_drives = (); my %fail_parity_drives = (); #Cmode Global variables my @Nodes; my $test_status = 0; my %sysconfig_v2 = {}; my %fcal_lk_s6 = {}; my %fcal_lk_s18 = {}; my %s_admin_s6 = {}; my %s_admin_s19 = {}; my ( $email_subject, $email_body ); my $channelcount; my @temp_channels; my %filer_nvol = {}; my $disk_step1; my $disk_step4; ######################################### # Testcase available for execution ######################################### my @Testcases = ( ERT_Fsys_Reconstruction => "ERT_Fsys_Reconstruction" ); # End log parameters my $log_comments = "$LOGDIR/$TC_name" . "\.log"; my $log_console = "$LOGDIR/$TC_name" . ".END_FILER_CONFIG" . ".log"; my $Home = $ENV{HOME}; my %nodes_filer = {}; my $Mode; ######################################### ## Pre-test processes ########################################## &main(); sub main { # Debug break point $DB::single = 2; # Create Test Case Driver object $Test = new TCD( -testcases => [@Testcases] ); # Testcases will be executed using TCD object. if ( $Test->error ) { $Test->log( $Test->errmsg ); return $TCD::FAIL; } # Performs method callbacks $Test->run_test(); if ( $Test->error ) { $Test->log( $Test->errmsg ); return $TCD::FAIL; } exit(0); } ## end sub main ########## INIT ################################################### # init automatically called before executing tests #################################################################### sub init() { $Test->description(" Initialising all required variables and FILER connections "); $filer_names = $Test->get_param("FILER"); # Capturing Filer names from the param(Test_Suite) logcomment("Filers present are $filer_names"); my @Filers; if ( $filer_names =~ /\,/ ) { @Filers = split( /\,/, $filer_names ); } else { push( @Filers, $filer_names ); } ##Check for duplicate node object and push unique node object in Nodes array. my @temp_nodes = NACL::C::Node->find(); # Find Nodes/Filers used in the test, Based on FILER param. foreach my $Node (@temp_nodes) { my $FILER_C = $Node->name(); $nodes_filer{$FILER_C} = $Node; } my @Nodes_key = keys(%nodes_filer); foreach my $key (@Nodes_key) { if ( $key =~ /HASH/ ) { delete $nodes_filer{$key}; } } @Nodes = values(%nodes_filer); # Contains Node object used for test execution. sort(@Nodes); logcomment("Checking for execution mode"); $Mode = $Nodes[0]->mode(); version_test( node_present => \@Nodes, tc_name => $TC_name ); return $TCD::PASS; } ########## SETUP ################################################### # Setup automatically called before executing tests ##################################################################### sub setup() { $Test->description("Setup the environment for the test exectuion "); logcomment("Mode of filer $filer_names : $Mode"); my $node_ref = \@Nodes; ##################################################################### # Pre test proces : call for pre_n_post test process ##################################################################### execute_pre_test( node_present => $node_ref, Test => $Test, change_state_to => "CLI", filer_mode => $Mode, LOGDIR => $LOGDIR, BOOT_TYPE => $BOOT_TYPE, test_setup => $TEST_SETUP, ); return $TCD::PASS; } ########## TEST 1 ################################################### #ERT_Fsys_Reconstruction ##################################################################### sub ERT_Fsys_Reconstruction { my @subtests; logcomment( "Number of nodes are " . scalar @Nodes . " and the filers are $filer_names" ); foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); push( @subtests, subtest( \&ERT_Fsys_Reconstruction_sub, -runid, "ERT_Fsys_Reconstruction_$FILER_C", -bg, "--", $Node ) ); } Subtest::wait_finish( subtest => [@subtests] ); my $return = status_return( subtest_ref => [@subtests] ); if ( $return == 0 ) { return $TCD::PASS; } else { return $TCD::FAIL; } } ####################### ##Subtest_create ####################### sub ERT_Fsys_Reconstruction_sub { my @Nodes; push( @Nodes, shift(@_) ); $Test->description(" ERT_Fsys_Reconstruction"); eye_catcher( Test => $Test, string => "Starting execution of test steps. Total steps to be executed as part of the script are: 8" ); ########################################################################################### ## Set test specific options and values ########################################################################################### foreach my $Node (@Nodes) { my $FILER_C = $Node->name; logcomment("$FILER_C : Turn off the raid options"); NACL::C::Options->option( command_interface => $Node, 'option-name' => 'raid.media_scrub.enable', 'option-value' => 'off' ); NACL::C::Options->option( command_interface => $Node, 'option-name' => 'raid.media_scrub.spares.enable', 'option-value' => 'off' ); NACL::C::Options->option( command_interface => $Node, 'option-name' => 'raid.scrub.enable', 'option-value' => 'off' ); } logcomment("Executing sysconfig -r at beginning of script"); my $list_data; my $raw; foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); logcomment("$FILER_C - Executing sysconfig -r"); ( $raw, $list_data ) = sysconfig_r_data( node => $Node ); } # my $required_drives = ( ( $ERT_VOL * 5 ) + 5 ); my $required_drives = ( ( $ERT_VOL * 3 ) + $MAX_DRIVES_TO_FAIL + $MAX_DATA_DRIVES_TO_FAIL + 2); logcomment("The user-selected inputs are as follows:\nERT_VOL = $ERT_VOL\nMAX PARITY FAILED DRIVES = $MAX_DRIVES_TO_FAIL \nSCRUB OPERATIONS = $MAX_SCRUB_OPERATIONS \nMAX DATA DRIVES TO FAIL = $MAX_DATA_DRIVES_TO_FAIL"); foreach my $Node (@Nodes) { my $i = 3; my $FILER_C = $Node->name(); my $Api_Set_Obj = $Node->get_7m_or_nodescope_apiset(); my $disk_filer = disk_show_drive_maint( API_Obj => $Api_Set_Obj, Filer => $FILER_C ); my @disk_filer = @{ $disk_filer->{'driveA'} }; my $driv_cnt = scalar @disk_filer; logcomment( "$FILER_C : Total number of drives present in the system are " . scalar @disk_filer . " and the drives are @disk_filer" ); logcomment("$FILER_C : Minimum required drives for the test are $required_drives"); if ( $required_drives > scalar @disk_filer ) { logcomment("**FATAL** Total number of drives present in the system are less than the required drives"); return logresult( "INFO", msg => 1 ); } my @spares = spare_count_cmode( Node => $Node, required_spare => $required_drives ); my $spare_cnt = @spares; logcomment("$FILER_C : Spare count is $spare_cnt"); # while ($i) { my @DISKS; ############################## #Checking for non root volumes ############################## my @non_root = check_non_root_volumes( Node => $Node ); logcomment("Non root volumes are not present") if ( !@non_root ); logcomment( "Total number of Non-root volumes already exist are " . scalar @non_root . " and the volumes are @non_root" ) if (@non_root); my $non_root = scalar @non_root; if ( $non_root < $ERT_VOL ) { my $rest = ( $ERT_VOL - $non_root ); logcomment("Number of non-root volumes required to be created are $rest"); ############################## #Checking for available spares ############################## my @spares = spare_count_cmode( Node => $Node ); logcomment( "$FILER_C : Total Spare count on filer is " . scalar @spares ); my $leave_spare = $MAX_DRIVES_TO_FAIL + $MAX_DATA_DRIVES_TO_FAIL; logcomment("Parity drives to be failed is $MAX_DRIVES_TO_FAIL\nData drives to be failed is $MAX_DATA_DRIVES_TO_FAIL\nLeaving $leave_spare spares for reconstruction"); my $spare_cnt = scalar @spares - $leave_spare; logcomment("Now, the spares left for volume creation is $spare_cnt") if ( $spare_cnt >= 0 ); logcomment("Spare drives present on the system are less than the required for the volume creation") if ( $spare_cnt < 0 ); if ( ( $rest * 3 ) >= $spare_cnt ) { logcomment( "**FATAL**: $FILER_C - Spare drives present are $spare_cnt which is less than the required number i.e " . ( $rest * 5 ) ); return logresult( "INFO", msg => 1 ); } my $drive_cnt = floor( $spare_cnt / $rest ); logcomment("Drives involved in one volume creation will be $drive_cnt"); my @drives; create_flex_vol( Node => $Node, # disk_list => \@spares, diskcount => $drive_cnt, size => '95' ); logcomment("$FILER_C : Checking the Spare count on the filer"); $spare_cnt = spare_count_cmode( Node => $Node, run_local => 'YES' ); logcomment( "Total Number of spare drives left are " . $spare_cnt ); @non_root = check_non_root_volumes( Node => $Node ); # $non_root_final{$FILER_C} = [@non_root]; logcomment("Non root volumes are not present") if ( !@non_root ); logcomment( "Total number of Non-root volumes already exist are " . scalar @non_root . " and the volumes are @non_root" ) if (@non_root); if ( scalar @non_root < $ERT_VOL ) { logcomment("**FATAL**: $FILER_C: Volumes created are less than the required number of volumes"); return logresult( "INFO", msg => 1 ); } last; } # end of if loop $non_root < $ERT_VOL else { #( $non_root == $ERT_VOL ) ################################ #Checking for available spares ################################ my $spare_cnt = spare_count_cmode( Node => $Node, run_local => 'YES' ); my @aggregates; try { my @tot_aggr = NACL::CS::StorageAggregate->fetch( command_interface => $Node, filter => { node => [$FILER_C], 'root' => 'false' } ); foreach my $aggr (@tot_aggr) { push( @aggregates, $aggr->aggregate ); } logcomment("$FILER_C :: Non Root aggregates are - @aggregates"); } catch NACL::Exceptions::NoElementsFound with { logcomment("$FILER_C : Non Root aggregates are not found in the filer"); }; my $aggregate_drives; foreach my $aggr (@aggregates) { my $disk_obj = NACL::CS::StorageAggregate->fetch( command_interface => $Node, filter => { 'aggregate' => "$aggr", }, requested_fields => ['disklist'] ); my @disks = $disk_obj->disklist(); push( @DISKS, @disks ) if (@disks); logcomment( "In the aggregare : $aggr total number of drives present are :: " . scalar @disks . " and the drives are @disks" ); } logcomment( "$FILER_C : Total number of drives present in all the aggregates are " . scalar @DISKS . " and the drives are @DISKS" ); $aggregate_drives = scalar @DISKS; logcomment("$FILER_C : Total Spare count on filer is $spare_cnt"); my $leave_spare = $MAX_DRIVES_TO_FAIL + $MAX_DATA_DRIVES_TO_FAIL; logcomment("Parity drives to be failed is $MAX_DRIVES_TO_FAIL\nData drives to be failed is $MAX_DATA_DRIVES_TO_FAIL\nLeaving $leave_spare spares for reconstruction and three extra"); my $spare_req = $leave_spare + 3; logcomment("Total number of drives which are required to run the test is : $spare_req"); if ( $spare_req > ( $spare_cnt + $aggregate_drives ) ) { logcomment( "**FATAL** : $FILER_C - Total number of drives are not sufficient to run the test i.e " . $spare_req ); return logresult( "INFO", msg => 1 ); } else { #aggregate_volume_delete( Node => $Node ); #my @spares = spare_count_cmode( Node => $Node ); logcomment( "Total number of spare drives " . scalar @spares ); # last; } } #end of else loop # $i--; #} #end of while loop } #----------------------------------------------------------------------- ## Step 1 - Run hammer threads. ## Pass/Fail criteria: Verify that hammer is running. #---------------------------------------------------------------------- my %hammer_volumes = (); foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); eye_catcher( Test => $Test, string => "STEP 1 of 8 : $FILER_C : Run hammer threads" ); @non_root = _check_non_root($Node); $filer_nvol{$FILER_C} = [@non_root]; my @n_root_vol = @{ $filer_nvol{$FILER_C} }; @n_root_vol = sort { $a <=> $b } @n_root_vol; @{ $hammer_volumes{$FILER_C} } = splice( @n_root_vol, 0, $ERT_VOL ); logcomment("Volumes on which hammer is running @{$hammer_volumes{$FILER_C}}"); foreach my $vol ( @{ $hammer_volumes{$FILER_C} } ) { my $return = hammer_start( Node => $Node, file_name => "/vol/$vol/h0", file_size => '65536', Test => $Test ); if ( $return == 0 ) { logcomment("Hammer Started"); } else { logcomment("**FATAL** : $FILER_C - Could not Start HAMMER with the file size 1234"); return logresult( "INFO", msg => 1 ); } } $disk_step1 = spare_count_cmode( Node => $Node, run_local => 'YES', ); } #-------------------------------------------------------------------------------- ## Step 2 - Fail a single of the parity drives. The Volume should start to reconstruct. ## Wait for the reconstruction to complete. Do this for upto 5 volumes depending on # of sparesCheck for errors. ## Pass/Fail criteria: No Check Conditions for any drive #-------------------------------------------------------------------------------- my @subtests; my %req_vols = (); my @parity_vol; foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); eye_catcher( Test => $Test, string => "STEP 2 of 8 : $FILER_C : Fail a single of the parity drives. The Volume should start to reconstruct" ); @{ $req_vols{$FILER_C} } = @{ $hammer_volumes{$FILER_C} }; @parity_vol = splice( @{ $hammer_volumes{$FILER_C} }, 0, $MAX_DRIVES_TO_FAIL ); my $vol_ref = \@parity_vol; _start_recons( $Node, $FILER_C, $Test, $TC_name, $vol_ref ); } #-------------------------------------------------------------------------------- ## Step 3 - Fail a drive from a raid group. Sick disk copy should start. Wait for the copy to complete. ## Do this for upto 3 volumes depending on # of sparesCheck for errors ## Pass/Fail criteria: No Check Conditions for any drive #-------------------------------------------------------------------------------- my @data_vol; foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); eye_catcher( Test => $Test, string => "STEP 3 of 8 : $FILER_C : Fail a drive from a raid group. Sick disk copy should start. Wait for the copy to complete." ); if ( $ERT_VOL == 1 ) { @data_vol = splice( @{ $req_vols{$FILER_C} }, 0, $MAX_DATA_DRIVES_TO_FAIL ); } if ( $ERT_VOL > 1 ) { @data_vol = splice( @{ $hammer_volumes{$FILER_C} }, 0, $MAX_DATA_DRIVES_TO_FAIL ); } my $vol_ref = \@data_vol; logcomment("The number of data drives to be failed as given by user is $MAX_DATA_DRIVES_TO_FAIL"); _start_disk_copy( $Node, $FILER_C, $Test, $TC_name, $vol_ref ); } #-------------------------------------------------------------------------------- ## Step 4 - Record system RAID configuration details. #-------------------------------------------------------------------------------- foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); eye_catcher( Test => $Test, string => "STEP 4 of 8 : $FILER_C : Record system RAID Configuration details" ); $disk_step4 = sysconfig_r_data( node => $Node ); #Comparing if ( $disk_step1 >= $disk_step4 ) { $test_status = 1; logcomment("**FAIL** : For $FILER_C the Parity drive are not replaced with the available spare drive = STEP : 4"); } else { logcomment("For $FILER_C the Parity drive are replaced with the available spare drive"); } } logcomment("System raid configuration details recorded."); #---------------------------------------------------------------------------------- ## Step 5 - Unfail the < admin failed drive> ## Pass/Fail criteria: Make sure the Admin failed drive becomes a spare drive. #---------------------------------------------------------------------------------- foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); eye_catcher( Test => $Test, string => "STEP 5 of 8 : $FILER_C : Unfail the admin failed drive." ); my $API_obj = $Node->get_7m_or_nodescope_apiset(); my @broken; my $fail_dsk; my $output = $API_obj->execute_command( 'command' => 'sysconfig -r' ); my @sys_info = split( /\n/, $output ); foreach (@sys_info) { if ( $_ =~ /(admin failed|admin removed)\s+(\S+)\s+(\S+)\s+/i ) { my $dsk = $2; logcomment("Admin Failed disk present - $dsk"); push( @broken, $dsk ); $fail_dsk = 1; } } if ( $fail_dsk == 1 ) { logcomment("Failed disk is present, Unfailing disks - @broken "); } foreach my $broken (@broken) { try { NACL::C::StorageDisk->unfail( command_interface => $Node, disk => $broken, spare => "true", 'method-timeout' => 1200 ); } catch NACL::APISet::Exceptions::CommandFailedException with { my $FILER_C = $Node->name(); logcomment("**WARNING** : $FILER_C - Could not unfail the disk $broken, Please check manually"); } catch NATE::BaseException with { my $FILER_C = $Node->name(); logcomment("**WARNING** : $FILER_C - Unfailing the drive is interupted, may be because of timeout issue, So trying again to unafail after 5 minutes"); sleep 300; try { NACL::C::StorageDisk->unfail( command_interface => $Node, disk => $broken, spare => "true", 'method-timeout' => 1200 ); } catch NATE::BaseException with { logcomment("$FILER_C : Unfailing the drive is not successfull, Please check manually"); }; }; } my ( $unfailed_found, @unfail_drives_ids, $unfail_drives_ids ); my @spare_after; my $i = 0; while ( $i < 3 ) { @spare_after = spare_count_cmode( Node => $Node, run_local => 'YES', ); $unfailed_found = 0; #my $unfail_drives_ids=@unfail_drives_ids; @unfail_drives_ids = @broken; $unfail_drives_ids = @unfail_drives_ids; #logcomment("the vaue of disk ".Dumper($unfail_drives_ids)); my $unfai_not; foreach my $unfailed_drive (@unfail_drives_ids) { $unfai_not = $unfailed_drive; logcomment("DEBUG :: Checking the unfailed drive $unfailed_drive in the spare list"); my @ufail_sep_ids = split( /\./, $unfailed_drive ); my $ufail_shelf_dev_id; if ( $ufail_sep_ids[2] =~ /^$/ ) { $ufail_shelf_dev_id = $ufail_sep_ids[0] . "." . $ufail_sep_ids[1]; } else { $ufail_shelf_dev_id = $ufail_sep_ids[1] . "." . $ufail_sep_ids[2]; } foreach my $spare_drive (@spare_after) { my @spare_sep_ids = split( /\./, $spare_drive ); my $spare_shelf_dev_id; if ( $spare_sep_ids[2] =~ /^$/ ) { $spare_shelf_dev_id = $spare_sep_ids[0] . "." . $spare_sep_ids[1]; } else { $spare_shelf_dev_id = $spare_sep_ids[1] . "." . $spare_sep_ids[2]; } if ( $spare_shelf_dev_id eq $ufail_shelf_dev_id ) { $unfailed_found++; logcomment("Unfailed disk $unfailed_drive found in the list of spare drives"); next; } } } last if ( $unfailed_found == $unfail_drives_ids ); logcomment("Unfailed disk $unfai_not not found in the list of spare drives. Rechecking") if ( $i < 2 ); sleep(120); logcomment("Checking of unfailed disk in spare disk list is completed. Unfailed disk $unfai_not not found in the list of spare drives after checking for 3 times also") if ( $i == 2 ); $i++; } if ( $unfailed_found != $unfail_drives_ids ) { $test_status = 1; logcomment("**FAIL** : All the unfailed drives does not appear in the list of spare drives - STEP : 5"); } else { logcomment("All Admin failed drives have become spare"); } } #------------------------------------------------------------ ## Step 6 - Stop hammer application. ## Pass/Fail criteria: Verify that Hammer has stopped #------------------------------------------------------------ foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); eye_catcher( Test => $Test, string => "STEP 6 of 8 : $FILER_C : Stop hanner application" ); my $API_obj = $Node->get_7m_or_nodescope_apiset(); my $ret; try { $ret = $API_obj->hammer_abort(); } catch NACL::APISet::Exceptions::ResponseException with { my $exception_object = shift; my $resp = $exception_object->get_response_object(); logcomment( "Caught a " . ref($exception_object) . "!" ); logcomment( "Error is: " . $exception_object->text() ); logcomment( "Raw output was:\n" . $resp->get_raw_output() ); logcomment("Hammer abort has not done properly"); }; sleep(60); } #------------------------------------------------------------ ## Step 7 - Collect SHM Stats. ## Pass/Fail criteria: N/A. #------------------------------------------------------------ foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); eye_catcher( Test => $Test, string => "STEP 7 of 8 : $FILER_C - Collect SHM Stats" ); my $API_obj = $Node->get_7m_or_nodescope_apiset(); $API_obj->execute_command( command => "disk shm_stats" ); } #------------------------------------------------------------ ## Step 8 - Collect SHM Stats. ## Pass/Fail criteria: N/A. #------------------------------------------------------------ foreach my $Node (@Nodes) { my $FILER_C = $Node->name(); eye_catcher( Test => $Test, string => "STEP 8 of 8 : $FILER_C - Collect SHM Stats asup" ); my $API_obj = $Node->get_7m_or_nodescope_apiset(); $API_obj->execute_command( command => "disk shm_stats asup" ); } ########################################################################################### ## Test log/results ########################################################################################### logcomment("Test $TC_name is completed."); logresult( "INFO", msg => $test_status ); } #end of sub test ERT Fsys Reconstruction ##################################################################### # Cleanup - Post Process ##################################################################### sub cleanup() { logcomment(" $filer_names - Clean up and post test process"); @Nodes = node_new_obj( node_array => [@Nodes] ); my $node_ref = \@Nodes; ########################################################################################### ## Delete/Reset test specific :options and values ########################################################################################### foreach my $Node (@Nodes) { my $FILER_C = $Node->name; logcomment("$FILER_C : Turn off the raid options"); NACL::C::Options->option( command_interface => $Node, 'option-name' => 'raid.media_scrub.enable', 'option-value' => 'on' ); NACL::C::Options->option( command_interface => $Node, 'option-name' => 'raid.media_scrub.spares.enable', 'option-value' => 'on' ); NACL::C::Options->option( command_interface => $Node, 'option-name' => 'raid.scrub.enable', 'option-value' => 'on' ); } ########################################################################################### ## Post Test process - Category : "post_test" ########################################################################################### execute_post_test( node_present => $node_ref, Test => $Test, change_state_to => "CLI", filer_mode => $Mode, LOGDIR => $LOGDIR, BOOT_TYPE => $BOOT_TYPE, test_setup => $TEST_SETUP, ); =head foreach my $Node (@Nodes) { my $FILER_C = $Node->name; end_log( source_log => $log_comments, end_log => $log_console, start_string => "Capture for $FILER_C : Starts", end_string => "Capture for $FILER_C : Ends" ); } =cut return $TCD::PASS; } #************************************************************************* ##Sub-routines used in the test. #************************************************************************* sub _check_non_root($) { my $Node = $_[0]; my @aggregates; my $filter; my $FILER_C = $Node->name(); my @non_root = (); $filter = 'vsroot'; try { ##Checking for Non Rooot Volumes my @tot_vol = NACL::C::Volume->find( command_interface => $Node, filter => { node => $FILER_C, $filter => 'false' } ); foreach my $vol (@tot_vol) { push( @aggregates, $vol->get_one_state_attribute('aggregate') ); push( @non_root, $vol->volume() ); } #************************************************************************* #Storing the created aggregates to delete later at the end of Test. #IF flag $vol_flag value is 1 we are creating volumes that needs to be deleted at the end. #If aggregates are deleted respective volumes also wil get delete. #************************************************************************* if ( $vol_flag == 1 ) { $filer_del_aggr{$FILER_C} = [@aggregates]; } logcomment("$FILER_C : Non Root volumes are - @non_root"); return @non_root; } catch NACL::Exceptions::NoElementsFound with { logcomment("$FILER_C : Non Root volumes are not found"); return @non_root; }; } sub _start_recons ($$$$) { my ( $Node, $FILER_C, $Test, $TC_name, $vol ) = @_; my ($numfaildr); # $fail_parity_drives ); my $sysconfig_output; my $aggr1_SAS_data; my $aggr1_BSAS_data; my $paritycount; my $SASparitydrive; my $BSASparitydrive; my $SAS_Spare_cnt = 0; my $BSAS_Spare_cnt = 0; logcomment("FILER - $FILER_C : Fail a single of the parity drives from available volumes "); ( $numfaildr, $fail_parity_drives{$FILER_C} ) = fail_parity_drive( Node => $Node, vol => $vol ); logcomment("$FILER_C : Total number of failed drives : $numfaildr"); logcomment("$FILER_C : List of failed parity drives : @{$fail_parity_drives{$FILER_C}}"); if ( $numfaildr == 0 ) { logcomment("**FATAL** : $FILER_C : No spare drive is available to copy for failing parity drive, skipping rest of the test steps : "); return logresult( "INFO", msg => 1 ); } logcomment("$FILER_C : Failed $numfaildr parity drives of filer $FILER_C"); #Waiting for Reconstruction to complete #logcomment("FILER- $Filer_l : Reconstruction started."); logcomment("Checking for the reconstruction to complete."); my $recon = WaitReconstruct( Node => $Node, failed_parity => $numfaildr, ); if ( $recon == 1 ) { logcomment("Reconstruction completed Successfully on $FILER_C"); logcomment( "FILER - $FILER_C : Reconstruction completed successfully : " . scalar( localtime() ) ); } else { logcomment("**FATAL** : $FILER_C - Reconstruction not completed successfully on the filer"); return logresult( "INFO", msg => 1 ); } } sub _start_disk_copy($$$$) { my ( $Node, $Filer_l, $Test, $TC_name, $vol ) = @_; logcomment("FILER - $Filer_l : Fail a single of the data drives from available volumes"); my ($numfaildr); my $sysconfig_output; my $aggr1_SAS_data; my $aggr1_BSAS_data; my $datacount; my $SASdatadrive; my $BSASdatadrive; my $SAS_Spare_cnt; my $BSAS_Spare_cnt; my $Api_Set_Obj = $Node->get_7m_or_nodescope_apiset(); ( $numfaildr, $fail_data_drives{$Filer_l} ) = fail_data_drive( Node => $Node, vol => $vol ); logcomment("$Filer_l : Total number of failed drives : $numfaildr"); logcomment("List of failed data drives : @{$fail_data_drives{$Filer_l}}"); if ( $numfaildr == 0 ) { logcomment( "FILER - $Filer_l : No spare drive is available to copy for failing data drive, skipping rest of the test steps : " . scalar( localtime() ) ); return logresult( "INFO", msg => 1 ); } #Waiting for Disk Copy to complete logcomment("Check for the Disk Copying to complete."); my $sick_dsk_copy = WaitSickDiskcopyEnd( $Node, $numfaildr, $TC_name, $Filer_l ); if ( $sick_dsk_copy == 1 ) { logcomment("Sick Disk copy completed Successfully on $Filer_l"); logcomment( "FILER- $Filer_l : Sick Disk copy completed successfully : " . scalar( localtime() ) ); } } sub WaitSickDiskcopyEnd { my ( $Node, $failed_data, $Testname, $FILER_C ) = @_; my $timeout = 288000; my $drcompleted = 0; my $curtime = time; my $endtime = $curtime + $timeout; sleep(200); while ( ( $drcompleted < $failed_data ) && ( $curtime < $endtime ) ) { my $API_obj = $Node->get_7m_or_nodescope_apiset(); my $after; if ( $Mode eq "CMode" ) { $after = $API_obj->execute_raw_command( "command" => "run local sysconfig -r" ); } else { $after = $API_obj->execute_raw_command( "command" => "sysconfig -r" ); } my $fail_disk; foreach my $line ( split( /\n/, $after ) ) { if ( $line =~ /-> copy\s+(\S+)\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\s+\S+\(copy\s+(\S+%)\s+completed\)/ ) { if ( defined($2) ) { logcomment("$2 percent of Disk copying has completd on disk $1 "); } } } my $dsk_cpy_flag = 0; my $API_obj = $Node->get_7m_or_nodescope_apiset(); my $after_recons; if ( $Mode eq "CMode" ) { $after_recons = $API_obj->execute_raw_command( "command" => "run local sysconfig -r" ); } else { $after_recons = $API_obj->execute_raw_command( "command" => "sysconfig -r" ); } sleep(120); foreach my $line ( split( /\n/, $after_recons ) ) { if ( ( $line =~ /copy in progress/ ) || ( $line =~ /-> copy/ ) ) { logcomment("FILER- :Sick Disk copy is in progress. Waiting for Sick Disk copying to complete"); $dsk_cpy_flag = 1; last; } } if ( $dsk_cpy_flag == 0 ) { logcomment("FILER- : Sick Disk copy completed as per the output of sysconfig command"); last; } $curtime = time; } ## end while (($drcompleted < $numfaildr... if ( $drcompleted < $failed_data ) { logcomment("WaitSickDiskcopyEnd: Disk copy timeout!! Total of $failed_data disk copies were completed."); } else { logcomment("WaitSickDiskcopyEnd: Disk copy is completed"); } }