# # Copyright (c) 2001-2014 NetApp, Inc., All Rights Reserved # Any use, modification, or distribution is prohibited # without prior written consent from NetApp, Inc. # ## @summary Wafliron Task Module ## @author dl-nacl-dev@netapp.com ## @status shared ## @pod here =head1 NAME NACL::MTask::Wafliron =head1 DESCRIPTION C provides a number of well-defined but potentially complex or multi-step methods related to Wafliron in ONTAP. It also provides the functionality to start wafliron and to retrieve different messages generated during wafliron run. # Start test use NACL::MTask::Wafliron; ... my $wafliron_obj = NACL::MTask::Wafliron->new( command_interface => $command_interface, aggregate => $aggregate, ); # Perform any operation here ... $wafliron_obj->start( nacltask_include_mirrors => 1, .. ); =over =item ATTRIBUTES =over =item "command_interface" => $command_interface ( Required, isa NACL::C::CommandInterface::ONTAP ) A component object that represents the host to which to send commands. See L =item "aggregate" => $aggregate ( Required, SCALAR | OBJECT ) Aggregate component or Aggregate task object or an Aggregate name on which we want to start the wafliron. =item "check_default_scan_speed" => 0 | 1 ( Optional, BOOLEAN, default 1 ) Option to check wafliron default scan speed. If set to 0, default scan speed check will not be performed and object attribute "default_scan_speed" will be un-initialized. =item "node" => $cmd_interface->node() ( Optional, node name ) Node name to capture the event logs from that particular node if needed. By default all the event logs from all the nodes in the cluster will be captured. =back =back =cut package NACL::MTask::Wafliron; use base qw(NACL::MTask::MTask); use strict; use warnings; use Readonly; use Tharn qw(sleep logcomment); use NATE::Log qw(log_global); my $Log = log_global(); my $may_enter = $Log->may_enter(); my $may_exit = $Log->may_exit(); use Params::Validate qw(validate_with SCALAR BOOLEAN HASHREF ARRAYREF OBJECT); use NATE::Exceptions::Argument (); use NACL::Exceptions::VerifyFailure (); use NACL::Exceptions::Timeout (); use NATE::BaseException qw(:try); use NACL::APISet::Exceptions::InvalidParamValueException qw(:try); use NACL::C::Cluster; use NACL::C::StorageAggregateWafliron; use NACL::STask::Aggregate; use NACL::CS::StorageAggregate; use NACL::MTask::SystemLogDetector; use NACL::MTask::EventLogDetector; use NACL::MTask::Wafliron::WaflironUtils qw(:HELPER_METHODS); use DateTime; use POSIX 'strftime'; use Data::Dumper; use List::MoreUtils qw(uniq); use NATE::ProcManager; use constant LOCAL_WAFLIRON_TIMEOUT => 2400; use constant WAFLIRON_COMMIT_TIMEOUT => 300; use constant WAFLIRON_POLL_INTERVAL => 10; =head1 METHODS =head2 new my $wafliron_obj = NACL::MTask::Wafliron->new( command_interface => $command_interface, aggregate => $aggregate, ); Creates a new "Wafliron" object, that can be used to invoke all wafliron and ioc operations. =cut use Class::MethodMaker [ new => [ '-hash', '-init', 'new' ], scalar => 'aggregate', scalar => [ { -type => 'NACL::C::CommandInterface::ONTAP' }, 'command_interface' ], scalar => [ { -type => 'NACL::APISet' }, 'filer_api' ], scalar => [ { -type => 'NACL::C::StorageAggregateWafliron' }, 'wafliron_component' ], scalar => [ { -type => 'NACL::MTask::EventLogDetector' }, 'event_log_object' ], scalar => 'default_scan_speed', scalar => [ { -default => 0 }, 'corruption_found' ], scalar => 'check_default_scan_speed', scalar => 'node', scalar => '_wafliron_started', scalar => [ { -default_ctor => sub { NACL::CS::StorageFailover->fetch( command_interface => $_[0]->command_interface, requested_fields => [qw(partner-name enabled)], filter => { node => $_[0]->command_interface }, 'method-timeout' => 30 )->partner_name() }}, 'partner' ], ]; sub init { $Log->enter() if $may_enter; my $self = shift; my %opts = validate_with( params => \@_, spec => { command_interface => { isa => 'NACL::C::CommandInterface::ONTAP' }, check_default_scan_speed => { type => BOOLEAN, default => 1 }, node => {type => SCALAR, optional => 1 }, %{ $self->_aggr_name_or_obj_validate_spec() } } ); my $scan_speed_check = delete $opts{check_default_scan_speed}; my $node = delete $opts{node} if (defined $opts{node}); my %event_opts; # Find aggregate's host node command_interface for CMode if ( $self->_mode() eq 'CMode' ) { my $ci = delete $opts{command_interface}; my $aggr = ref $opts{aggregate} ? $opts{aggregate}->aggregate : $opts{aggregate}; my $node_name = NACL::CS::StorageAggregate->fetch( command_interface => $ci, filter => { aggregate => $aggr }, requested_fields => ['owner-name'], )->owner_name(); if ( $node_name ne $ci->node() ) { my $node_obj = NACL::C::Node->new(name => $node_name); $self->command_interface($node_obj); } } if ( ref $opts{aggregate} ) { $self->aggregate( $opts{aggregate} ); } else { $self->aggregate( NACL::STask::Aggregate->new( command_interface => $self->command_interface(), aggregate => $opts{aggregate} ) ); } ## end else [ if ( ref $opts{aggregate... $self->_create_apiset_objs(); $self->wafliron_component( NACL::C::StorageAggregateWafliron->new( command_interface => $self->command_interface(), aggregate => $self->aggregate()->aggregate() ) ); # Get the default wafl scan speed if ( $scan_speed_check ) { my $scan_speed = $self->_get_default_scan_speed(); $self->default_scan_speed($scan_speed); } $event_opts{node} = $node if($node); $self->event_log_object( NACL::MTask::EventLogDetector->new( command_interface => $self->command_interface(), %event_opts, ) ); $Log->exit() if $may_exit; } ## end sub init =head2 start $wafliron_obj->start( nacltask_include_mirrors => 1, .. ); (Instance method) Starts wafliron on an aggregate. This does not have any return value. Instead, it stores the system log object (for wafliron messages) on the instance with the key "system_log_object". This method waits for wafliron to complete depending on the input param "nacltask_wait". # Start test use NACL::MTask::Wafliron; ... my $wafliron_obj = NACL::MTask::Wafliron->new( command_interface => $command_interface, aggregate => $aggregate, ); # Perform any operation here ... $wafliron_obj->start( nacltask_include_mirror => 1, nacltask_optional_commit => 'true', nacltask_wait => 1, # default is 1 nacltask_wafl_scan_speed => 1000, # default is 10000 ); To start wafliron prev-cp mode: # Start test use NACL::MTask::Wafliron; ... my $wafliron_obj = NACL::MTask::Wafliron->new( command_interface => $command_interface, aggregate => $aggregate, ); # Perform any operation here ... $wafliron_obj->start( nacltask_optional_commit => 'prev_cp', nacltask_prev_cp => { prev_cp_mode => 'semi_interactive', aggregate_response => { $aggr => 1 }, volume_response => { $vol1 => 1, $vol2 => 2 }, incomplete_volume_response => "$vol1,$vol2,$vol3:1,$vol4:1" } nacltask_wait => 1, # default is 1 nacltask_wafl_scan_speed => 1000, # default is 10000 ); =over =item Options =over =item "nacltask_include_mirrors" => 0 | 1 ( Optional ) Option to indicate if snapmirrors need to be included for checking. =item "nacltask_optional_commit" => true | false | prev_cp ( Optional ) Option to indicate whether the iron needs to be run in optional commit mode or not. 1. If optional_commit is true, wafliron runs in Optional Commit mode. 2. If optional_commit is prev_cp, wafliron runs in Optional Commit prev_cp mode. In optional commit prev_cp mode, "aggregate_response", "volume_response" and "incomplete_volume_response" are the mendatory parameters depending on the "prev_cp_mode" (interactive|non_interactive|semi_interactive). 3. If optional_commit is false, regular wafliron run. =item "nacltask_prev_cp" => { prev_cp_mode => 'interactive', .. } ( Optional ) If B<< optional_commit >> is of type prev_cp, then B<< nacltask_prev_cp >> parameter is mandatory. Different attributes of the B<< nacltask_prev_cp >> hash are B<< aggregate_response => { $aggr => 1 } >> (Type: Hashref Optional: No) Response list for aggregate prompt B<< volume_response => { $vol1 => 1, $vol2 => 2 } >> (Type: Hashref Optional: Yes) Response list for volume prompts B<< volume_with_snap_id_list => $scalar >> (Type: Scalar Optional: Yes) Response list for volume prompts where the response contains both volume and snap ids, volumes are separated by ',' and ':' separated volume & inofile selection. Applicable only for prev_cp interactive and semi-interactive mode. Example : To pass volume name with snapshot-id in volume selection prompt (e.g. testvol_third:1 ), volume_with_snap_id_list needs to be specified as shown below. |List of volumes on which Wafliron in Previous-Checkpoint mode can run is:\r\n |Node: fas3170-210-03\r\n |Aggregate: dst_aggr\r\n |\r\n |Volume Vserver\r\n |(Ordinal Number) Volume UUID (Name or UUID)\r\n |------------------- ------------------------------------ ------------------\r\ |testvol_second 8b7efa09-4a28-11e0-8b37-123478563412 vs0\r\n |testvol_third 96cd8aa8-4a28-11e0-8b37-123478563412 vs0\r\n |Selection? : |testvol_second,testvol_third:1\r Method call : $wafliron_obj->start( nacltask_optional_commit => 'prev_cp', nacltask_prev_cp => { prev_cp_mode => 'interactive', aggregate_response => { $dst_aggr => 'r' }, volume_response => { $testvol_second => 'r'}, volume_with_snap_id_list => "$testvol_third:1" } ); B<< incomplete_volume_response => $scalar >> (Type: Scalar Optional: Yes) This refers to incomplete volume response list in semi interactive prev_cp mode. If user wants to pass volume response along with the wafliron start command, this parameter needs to be specified. Example : To pass volume response along with the start command, filer::> storage aggregate wafliron start -aggregate test_aggr -optional-commit true -previous-cp vol3:1 Method call : $wafliron_obj->start( nacltask_optional_commit => 'prev_cp', nacltask_prev_cp => { prev_cp_mode => 'semi_interactive', aggregate_response => { $aggr => 'r' }, volume_response => { $vol1 => 1, $vol2 => 2 }, incomplete_volume_response => "$vol3:1" }, ); B<< prev_cp_mode => $scalar >> (Type: Scalar Optional: No) This parameter is mendatory if nacltask_optional_commit is 'prev_cp'. 1. If this is set to B<< non_interactive >>, previous cp will be executed in non-interactive mode. 2. If this is set to B<< interactive >>, previous cp will be executed in interactive mode. 3. If this is set to B<< semi_interactive >>, previous cp will be executed in semi-interactive mode. Example: $wafliron_obj->start( nacltask_optional_commit => 'prev_cp', nacltask_prev_cp => { prev_cp_mode => 'semi_interactive', aggregate_response => { $aggr => 'r' }, volume_response => { $vol1 => 1, $vol2 => 2 }, incomplete_volume_response => "$vol1,$vol2,$vol3:1,$vol4:1" }, nacltask_wait => 0 ); =item "nacltask_wait" => 0 | 1 ( Optional, defaults to 1 if not provided ) If nacltask_wait is set to 1, the wafliron start() method will wait for wafliron complete otherwise, return immediately after start. =item "nacltask_force" => 0 | 1 ( Optional, defaults to 0 if not provided ) 7Mode command line option -f. =item "nacltask_check_compressed_data_blocks" => 0 | 1 ( Optional ) 7Mode command line option -c ( Only for RR.0 ). =item "nacltask_volume_to_iron_first" => [ $vol1, $vol2, .. ] ( Optional ) Select which volumes to iron first. 7Mode command line option -v. =item "nacltask_inject_errors" => $scalar ( Optional ) 7Mode command line option -i to inject errors. =item "nacltask_timeout" => $scalar ( Optional ) Timeout in seconds for NACL::MTask::SystemLogDetector. Default is 120 seconds, as set by NACL::MTask::SystemLogDetector. =item "nacltask_wafl_scan_speed" => $scalar ( Optional, defaults to 0 if not provided ) Speed of the wafl scan, should be between -1 and 10000. Default is 10000. scan_speed = 0 means to set the default dynamic scan speed. scan_speed = -1 means to suspend scanning. =item "nacltask_flags_to_set" => { $flag => $value, .. } ( Optional ) A reference to a hash with a list of flags and values to be set before running wafliron. The key is the name of the flag and the value is the value to be set. Example : $wafliron_obj->start( command_interface => $command_interface, nacltask_include_mirror => 1, ... nacltask_flags_to_set => { wafl_iron_panic_on_corruption => 1, wafl_iron_defer_summary_check => 0, ... } ); =item "backup" => yes | no | force ( Optional ) Create Backup Image. =item "use_backup" => true | false ( Optional ) Use Backup Image. =item "nacltask_iron_start_retry" => $integer (Optional, defaults to 3 if not provided) Number of attempts to initialize wafliron (to avoid the failures due to transient error while starting iron). This option is applicable only for normal wafliron and not for prev-cp and IOC-RA. Example : $wafliron_obj->start( command_interface => $command_interface, nacltask_iron_start_retry => 5, # default is 3 ... ); =item "nacltask_nohalt" => true | false (Optional) Add -nohalt option with wafliron command =back =back =cut sub start { $Log->enter() if $may_enter; my $self = shift; my %opts = validate_with( params => \@_, spec => { nacltask_include_mirrors => { type => BOOLEAN, optional => 1 }, nacltask_optional_commit => { regex => qr/^(true|false|prev_cp)$/, optional => 1 }, nacltask_prev_cp => { type => HASHREF, optional => 1 }, nacltask_wait => { type => BOOLEAN, default => 1 }, nacltask_iron_start_retry => { type => SCALAR, default => 3 }, nacltask_force => { type => BOOLEAN, default => 0 }, nacltask_check_compressed_data_blocks => { type => BOOLEAN, optional => 1 }, nacltask_volume_to_iron_first => { type => ARRAYREF, optional => 1 }, nacltask_inject_errors => { type => SCALAR, optional => 1 }, nacltask_timeout => { type => SCALAR, optional => 1 }, nacltask_wafl_scan_speed => { type => SCALAR, default => 0 }, nacltask_flags_to_set => { type => HASHREF, optional => 1 }, nacltask_nohalt => { regex => qr/^(true|false)$/, optional => 1 }, backup => { regex => qr/^(yes|no|force)$/, optional => 1 }, use_backup => { regex => qr/^(true|false)$/, optional => 1 }, %{ NACL::C::Component->_common_validate_spec_without_ci() } } ); # Check if method-timeout is defined. If not, assign default value if ( !defined $opts{'method-timeout'} ) { $opts{'method-timeout'} = LOCAL_WAFLIRON_TIMEOUT; } # Get partner name my $partner_name = $self->partner(); # Copy common params (without command interface) my $common_opts = $self->_hash_copy( source => \%opts, copy => [ keys %{ NACL::C::Component->_common_validate_spec_without_ci() } ] ); # Check iron state and throw exception if it is active # on the specified aggregate. my $wi_status; my $poll_required = 1; try { $wi_status = $self->_get_wafliron_status(); } catch NATE::BaseException with { my $e = shift; if ( !$e->isa('NACL::Exceptions::NoElementsFound') && $e->text() !~ m[No matching wafliron found]i ) { $Log->exit() if $may_exit; $e->throw(); } $poll_required = 0; }; if ($poll_required) { my $exceeded_polltime = 1; my $iron_poll_time = time() + $opts{'method-timeout'}; while ( time() < $iron_poll_time ) { if ( defined $wi_status && $wi_status == 0 ) { $exceeded_polltime = 0; last; } Tharn::snooze WAFLIRON_POLL_INTERVAL; $wi_status = $self->_get_wafliron_status(); } if ($exceeded_polltime) { $Log->exit() if $may_exit; NATE::BaseException->throw( "Iron is active on the aggregate :" . $self->aggregate->aggregate() ); } } my %wafl_flags; if ( defined $opts{nacltask_flags_to_set} ) { %wafl_flags = %{ $opts{nacltask_flags_to_set} }; } # Iron won't start if DSC & MDLC flags disabled while LDC flag enabled if ( defined $wafl_flags{wafl_iron_lazy_dist_checks} && $wafl_flags{wafl_iron_lazy_dist_checks} ) { NATE::BaseException->throw( "Cannot run wafliron as " . "wafl_iron_lazy_dist_checks flag is set and either " . "wafl_metadata_checks_in_raid flag is set or " . "wafl_iron_defer_summary_check flag is disable" ) if ( (defined $wafl_flags{wafl_metadata_checks_in_raid} && $wafl_flags{wafl_metadata_checks_in_raid}) || (defined $wafl_flags{wafl_iron_defer_summary_check} && !$wafl_flags{wafl_iron_defer_summary_check}) ); } $self->_set_flags( wafl_flags => \%wafl_flags ); # Set wafl scan speed my $scan_speed = delete $opts{nacltask_wafl_scan_speed}; if ( $scan_speed < 0 && $opts{nacltask_wait} ) { $Log->warn( "Negative value is provided for nacltask_wafl_scan_speed." . " Changing nacltask_wait to 0." ); $opts{nacltask_wait} = 0; } $self->_set_scan_speed($scan_speed); my $log_detector = $self->event_log_object(); $log_detector->start(); $self->event_log_object($log_detector); $self->corruption_found(0); # Check nacltask_optional_commit and nacltask_prev_cp options # and start wafliron accordingly. # Iron retry option is applicable only for normal wafliron my $iron_start_retry = delete $opts{nacltask_iron_start_retry}; if ( !defined( $opts{nacltask_optional_commit} ) || $opts{nacltask_optional_commit} =~ m[false]i ) { $self->_start_wafliron( %opts, nacltask_iron_start_retry => $iron_start_retry, poll_interval => WAFLIRON_POLL_INTERVAL * 2 ); } else { my $aggr_object = $self->aggregate(); $aggr_object = $aggr_object->isa('NACL::C::StorageAggregate') ? $aggr_object->cast_component_to_task() : $aggr_object; # Change aggregate state to 'offline' $aggr_object->modify( 'unmount-volumes' => 'true', state => 'offline', nacltask_if_modified => 'pass' ); # Start wafliron with optional-commit true if ( $opts{nacltask_optional_commit} =~ m[true]i ) { $self->_start_wafliron( %opts, nacltask_iron_start_retry => $iron_start_retry, poll_interval => WAFLIRON_POLL_INTERVAL * 2 ); } elsif ( $opts{nacltask_optional_commit} =~ m[prev_cp]i ) { # If prev_cp_mode = semi_interactive, start wafliron with # prev_cp in semi-interactive mode. # If prev_cp_mode = interactive, start wafliron with # prev_cp in interactive mode # If prev_cp_mode = non_interactive, start wafliron with # prev_cp in non-interactive mode my $prev_cp_mode = $opts{nacltask_prev_cp}->{prev_cp_mode}; if ( $prev_cp_mode eq 'semi_interactive' ) { $self->_prev_cp_semi_interactive(%opts); } elsif ( $prev_cp_mode eq 'interactive' ) { $self->_prev_cp_interactive(%opts); } elsif ( $prev_cp_mode eq 'non_interactive' ) { $self->_prev_cp_non_interactive(%opts); } else { $Log->exit() if $may_exit; NATE::Exceptions::Argument->throw( "Invalid value - $prev_cp_mode for" . " option prev_cp_mode" ); } ## end else [ if ( $prev_cp_mode eq ... } ## end elsif ( $opts{nacltask_optional_commit... } ## end else [ if ( !defined( $opts{nacltask_optional_commit... my $start_timeout = $opts{'method-timeout'}; my $endtime = time() + $start_timeout; my $wafl_check_info = "aggregate\\s+" . $self->aggregate()->aggregate(); my $wafliron_started = 0; my $wafliron_start_message_found = 0; my @wafl_check_info_events; while ( time() < $endtime ) { # Check wafliron status if ( !$wafliron_started ) { # Now check if wafliron is in running state my $wi_status = $self->_get_wafliron_status(); if ( $wi_status == 1 ) { # Wafliron is in running state, Set $wafliron_started flag $Log->debug("Wafliron state is : $wi_status"); $wafliron_started = 1; } ## end if ( $wi_status == 1 ) } ## end if ( !$wafliron_started) # Check messages or messages.log (depending on mode) for # wafliron start message if ( !$wafliron_started ) { Tharn::snooze WAFLIRON_POLL_INTERVAL; my @events = $log_detector->stop(check => 0); my $end_date = $log_detector->event_end_time(); my @message_logs = $self->_get_matched_logs(log_detector => $log_detector, check_for_all_presence => ['wafl.iron.start'], event_end_time => $end_date, return_string => 0); if ( scalar(@message_logs)) { $wafliron_start_message_found = 1; } my @message_errors = $self->_get_matched_logs(log_detector => $log_detector, check_for_all_presence =>[ qr/not enough free space/i, qr/cannot start iron/i, qr/Cannot run wafliron/i, qr/no space left on device/i, qr/aborting iron/i ], event_end_time => $end_date, nacltask_field => "event", return_string => 0); if (scalar (@message_errors)) { $Log->exit() if $may_exit; NACL::Exceptions::VerifyFailure->throw( "Failed to start wafliron. Found errors: " . Dumper (\@message_errors) ); } } ## end if ( !$wafliron_started) # Break the loop if wafliron start message is found or # wafliron state is as expected. if ( $wafliron_start_message_found || $wafliron_started ) { $self->_wafliron_started(1); last; } } ## end while ( time() < $endtime) # If failed to capture the wafliron start message or wafliron state # isn't as expected, throw timeout exception. if ( !$wafliron_started && !$wafliron_start_message_found ) { $Log->exit() if $may_exit; NACL::Exceptions::Timeout->throw( "wafliron did not start within the timeout period:" . " $start_timeout" ); } ## end if ( !$wafliron_started... # Depending on the 'nacltask_wait' => 0|1, either wait for # wafliron to complete or return if ( $opts{nacltask_wait} ) { $self->wait_for_completion( 'method-timeout' => $opts{'method-timeout'} ); } $Log->exit() if $may_exit; } ## end sub start =head2 wait_for_completion $wafliron_obj->wait_for_completion( 'method-timeout' => $timeout ); (Instance method) This method waits for wafliron to complete on the aggregate specified in wafliron start method. It checks the log file for wafliron complete message and verifies the wafliron status. Also, it checks for corruption and stores boolean value on the instance with the key "corruption_found". $wafliron_obj->wait_for_completion( 'method-timeout' => $timeout ); ... # Check if corruption is detected if ( $wafliron_obj->corruption_found() ) { ... $wafliron_obj->review(); ... } =over =item Options =over =item "method-timeout" => $timeout ( Optional, Default is 2400 ) Throw an exception if the given timeout is exceeded. =item C<< nacltask_verify=>$nacltask_verify_boolean >> (Optional) If '0' , verification will not be performed. If '1', (default) verification will be performed to ensure that the messages.log file shows entry for wafliron processes. =back =back =cut sub wait_for_completion { $Log->enter() if $may_enter; my $self = shift; my %opts = validate_with( params => \@_, spec => { 'method-timeout' => { type => SCALAR, default => LOCAL_WAFLIRON_TIMEOUT }, nacltask_verify => { type => BOOLEAN, default => 1 } } ); my $nacltask_verify = delete $opts{nacltask_verify}; my $aggregate = $self->aggregate()->aggregate(); my $mode = $self->_mode(); my $log_detector = $self->event_log_object(); my $endtime = time() + $opts{'method-timeout'}; my $start_msg_verified = 0; my $end_msg_verified = 0; my $warning_msg = "Did not find wafliron completed message. This " . "is expected in rare cases involving a lot of " . "wafliron output, e.g. massive corruptions. For " . "those cases, this condition can be ignored within" . " the test script code. Otherwise, it should be " . "considered a product bug"; # local constants Readonly::Scalar my $yes => 1; Readonly::Scalar my $no => 0; Readonly::Scalar my $unknown => -1; my $msg_log; my $messages_found = 0; my $wafliron_state_verified = 0; my $wi_status = undef; my $iron_was_running = $unknown; # In case of IOC, after iron completion, aggregate goes # to 'unmounting' state, before it becomes offline. # During this interim period, if we try to run any command # on aggregate, it is failed (InvalidParamValueException), # as aggregate is not available. So, we need to catch the # InvalidParamValueException and check aggregate state # after every 5 secs interval. Max wait time is 60 seconds. my $verify_aggr_state = sub { my $loop = 0; my $max_count = 12; while ( $loop < $max_count ) { try { my $aggr_state = $self->aggregate()->state->state(); $Log->debug( "Dumping aggregate state : " . $aggr_state ); if ( $aggr_state =~ m[iron_restricted|online|offline] ) { $wafliron_state_verified = 1; $Log->debug("Iron state is verified"); } $loop = $max_count; } catch NACL::APISet::Exceptions::InvalidParamValueException with { $Log->debug("Aggregate is not available, wait for 5 seconds"); Tharn::snooze 5; $loop++; }; } # end while }; # Wait until wafliron complete while ( time() < $endtime ) { $wi_status = $self->_get_wafliron_status(); if ( $wi_status == 1 ) { $Log->debug("Iron is running"); if ( $iron_was_running == $unknown ) { $Log->debug("We will wait for iron completion"); $iron_was_running = $yes; } } elsif ( $wi_status == 0 ) { if ( $self->_wafliron_started ) { $Log->debug("Iron was running"); $iron_was_running = $yes; } elsif ( $iron_was_running == $unknown ) { $Log->debug("Iron is not running"); $iron_was_running = $no; } else { $Log->debug("Iron is completed"); Tharn::snooze 5; } # verify agrregate state $verify_aggr_state->(); last; } ## end elsif ( $wi_status == 0 ) Tharn::snooze WAFLIRON_POLL_INTERVAL; } ## end while ( time() < $endtime) my @events = $log_detector->stop(check => 0); my $end_date = $log_detector->event_end_time(); if($nacltask_verify){ # If iron was running and it is completed, # We need to wait for iron completed message # to be logged into messages.log file Tharn::sleep 30 if ($iron_was_running == $yes); my $verify_end_msgs = sub { my $msg_verified = 0; $msg_log = $self->_get_matched_logs(log_detector => $log_detector, check_for_all_presence =>['wafl.iron.scan.skipped', 'callhome.wafl.iron.done', 'wafl.iron.completion.times'], event_end_time => $end_date, return_string => 1); if ($msg_log =~ /$aggregate/ ) { $msg_verified = 1; } return $msg_verified; }; if ( ( $iron_was_running == $yes ) && ( $wafliron_state_verified == 1 ) && ( $wi_status == 0 ) ) { my $wait_period = WAFLIRON_POLL_INTERVAL; $Log->debug("Waiting for iron completion messages"); while ( time() < $endtime ) { # Get wafliron messages $end_msg_verified = $verify_end_msgs->(); if ($end_msg_verified) { last; } $Log->debug("Sleeping for $wait_period seconds"); Tharn::snooze $wait_period; } # end while } elsif ( $iron_was_running == $no ) { # In case of iron not running, we won't wait # for iron messages to be logged in. We need to # verify wafliron complete messages from old log $end_msg_verified = $verify_end_msgs->(); } $Log->debug("wafliron_state_verified : $wafliron_state_verified"); $Log->debug("wi_status : $wi_status"); # Verify wafliron start messages $msg_log = $self->_get_matched_logs(log_detector => $log_detector, check_for_all_presence =>['wafl.iron.start'], event_end_time => $end_date, return_string => 1); if ($msg_log =~ /$aggregate/ ) { $start_msg_verified = 1; logcomment("Iron start messages are verified"); } else { logcomment("Iron start messages are not verified"); } # Verify wafliron complete messages if ($end_msg_verified) { logcomment("Iron completion messages are verified"); } else { logcomment("Iron completion messages are not verified"); } # Check for corruption messages $self->_check_corruption_msgs(); # # Verify, following # - if iron was actually started # - was iron already completed # - iron start message was found # if ( ( $iron_was_running == $no ) and ( !$start_msg_verified ) ) { NATE::BaseException->throw("Iron was not started"); } elsif ( ( $iron_was_running == $no ) and $start_msg_verified ) { logcomment("Iron run was already completed"); } elsif ( ( $iron_was_running == $yes ) and ( !$start_msg_verified ) ) { $Log->warn( "Iron start message was not found." . "It can be a product bug, please check with dev" ); } # Verify, if we received iron completion message if ( !$end_msg_verified ) { if ( ( $iron_was_running == $yes ) && $wafliron_state_verified ) { $Log->warn($warning_msg); } } # Verify, if iron run was completed # Or we reached here due to timeout if ( ( $wafliron_state_verified == 0 ) and ( $wi_status == 1 ) ) { logcomment( "Iron did not complete in $opts{'method-timeout'} seconds"); NACL::Exceptions::Timeout->throw( "TIMEOUT Error: Iron" . " did not complete in $opts{'method-timeout'} seconds" ); } } $Log->exit() if $may_exit; } ## end sub wait_for_completion =head2 get_wafliron_messages $wafliron_obj->get_wafliron_messages(); (Instance method) This method returns the messages (SCALAR context) generated during wafliron run from messages file. Previous logs will be reset after any of these method call [start(), reject(), commit(), stop()]. Example 1: # Start test use NACL::MTask::Wafliron; ... my $wafliron_obj = NACL::MTask::Wafliron->new( command_interface => $command_interface, aggregate => $aggregate, ); # Perform any operation here ... $wafliron_obj->start(%options); # This will return the messages generated during wafliron start $wafliron_obj->get_wafliron_messages(); Example 2: # Start test use NACL::MTask::Wafliron; ... my $wafliron_obj = NACL::MTask::Wafliron->new( command_interface => $command_interface, aggregate => $aggregate, ); # Perform any operation here ... $wafliron_obj->start(%options); $wafliron_obj->commit(); # This will return the messages generated during commit() method call only $wafliron_obj->get_wafliron_messages(); =cut sub get_wafliron_messages { $Log->enter() if $may_enter; my $self = shift; my $log_detector = $self->event_log_object(); $log_detector->stop(check => 0); my $end_date = $log_detector->event_end_time(); my @events = $log_detector->check(check_for_all_presence => [qr/.*/], nacltask_field => "event", event_end_time => $end_date); # Join the following fields from event log show: # time . [messagename:lc(ems-severity)] . event(striped in # the beginning) # Convert time from this: 4/21/2015 02:31:49 to Tue Apr 21 2015 02:31:49 my @messages; foreach my $event (@events) { my $time = $event->time(); $time = $self->_convert_time_format(old_time => $time); my $messagename = $event->messagename(); my $severity = lc($event->ems_severity()); my $event = $event->event(); $event =~ s/\S+:(\s+.*)\s\"$/$1/g; # Striping the messaagename so that a new format can be made my $new_message = $time . " [" . $messagename . ":" . $severity . "]" . $event; push (@messages, $new_message); } my $msg_log = join ("\n", @messages); $Log->exit() if $may_exit; return $msg_log; } ## end sub get_wafliron_messages sub _convert_time_format { $Log->enter() if $may_enter; my ( $pkg, %opts ) = @_; my $time = $opts{old_time}; my ($month, $day, $year, $time_str, $new_time); if ($time =~ /(\d+)\/(\d+)\/(\d+)\s+(\d+:\d+:\d+)/) { $month = $1; $day = $2; $year = $3; $time_str = $4; my $dt = DateTime->new( year => $year, month => $month, day => $day, ); my $month_abbr = $dt->month_abbr; my $day_abbr = $dt->day_abbr; $new_time = $day_abbr . " " . $month_abbr . " " . $day . " " . $year . " " . $time_str; } elsif ($time =~ /^\d+$/) { # ZAPI returns time as Epoch time: "1431578450" $new_time = strftime('%a %b %d %Y %T' , localtime($time)); } $Log->exit() if $may_exit; return $new_time; } =head2 review $wafliron_obj->review(); (Instance method) Run the wafliron review command for the aggregate specified in start method and return the output. Aggregate name set while instantiating the Wafliron MTask Object is being used as the aggregate param for wafliron review. Returns wafliron review messages in scalar context. Example : # Start test use NACL::MTask::Wafliron; ... my $wafliron_obj = NACL::MTask::Wafliron->new( command_interface => $command_interface, aggregate => $aggregate ); # Perform any operation here ... $wafliron_obj->start(%options); my $review_messages = $wafliron_obj->review(); # Perform check here. if ( $review_messages =~ m[...] ) { .... } =cut sub review { $Log->enter() if $may_enter; my $self = shift; my %opts = validate_with( params => \@_, spec => { %{ NACL::C::Component->_common_validate_spec_without_ci() } } ); my $wafliron_component_object = $self->wafliron_component(); my $response_object = $wafliron_component_object->review(%opts); $Log->exit() if $may_exit; return $response_object->get_raw_output(); } ## end sub review =head2 commit $wafliron_obj->commit( nacltask_force => 1 ); (Instance method) This function runs the wafliron commit command. Aggregate name set while instantiating the Wafliron MTask Object is being used as the aggregate param for wafliron commit. Example : # Start test use NACL::MTask::Wafliron; ... my $wafliron_obj = NACL::MTask::Wafliron->new( command_interface => $command_interface, aggregate => $aggregate, ); # Perform any operation here ... $wafliron_obj->start(%options); my $review_messages = $wafliron_obj->review(); # Perform check here. if ( $review_messages =~ m[...] ) { .... } # Commit the wafliron changes $wafliron_obj->commit( nacltask_force => 1 ); =over =item Options =over =item "nacltask_force" => 0 | 1 ( Optional, Default is 1 ) This enables the commit with force. =back =back =cut sub commit { $Log->enter() if $may_enter; my $self = shift; my %opts = validate_with( params => \@_, spec => { nacltask_force => { type => BOOLEAN, default => 1 }, %{ NACL::C::Component->_common_validate_spec_without_ci() } }, allow_extra => 1 ); my $aggregate = $self->aggregate()->aggregate(); my $force = $opts{nacltask_force}; my $apiset = $self->filer_api(); my @messages; my $msg_log; my $cmd = undef; my $messages_found = 0; my $aggr_state_verified = 0; # Check if method-timeout is defined. If not, assign default value if ( !defined $opts{'method-timeout'} || $opts{'method-timeout'} == 0 ) { $opts{'method-timeout'} = WAFLIRON_COMMIT_TIMEOUT; } # Decide commit cmd and log file based on the mode $cmd = $self->_mode() eq 'CMode' ? "storage_aggregate_wafliron_commit" : "aggr_wafliron_commit"; # Start capturing logs #$self->system_log_object->start(); $self->event_log_object->start(); my $answer = ( $force == 1 ) ? "Y" : "N" ; my $sm_change = qr/Committing\sthese\schanges\swill\sbreak\sthe\ssynchronization\sbetween\sthe\svolumes/i; my $commit_changes = qr/Commit\schanges\sfor\s(aggregate|volume)\s.*\sto\sdisk\?/i; my @prompts_answers = ( "$sm_change" => "$answer", "$commit_changes" => "$answer", ); $apiset->$cmd( aggregate => $aggregate, 'connectrec-match_table' => \@prompts_answers, 'connectrec-timeout' => $opts{'method-timeout'}, ); my $commit_end_time = time() + WAFLIRON_COMMIT_TIMEOUT; # Wait until commit is done or timeout while ( time() < $commit_end_time ) { # Going to check wafliron and aggregate state if ( !$aggr_state_verified ) { if ( !$self->_get_wafliron_status() && $self->aggregate()->get_one_state_attribute('state') =~ m[offline|online] ) { $aggr_state_verified = 1; } ## end if ( !$self->_get_wafliron_status... } ## end if ( !$aggr_state_verified) Tharn::snooze 30; # Check commit messages if ( !$messages_found ) { $msg_log = $self->_get_matched_logs(log_detector => $self->event_log_object, check_for_all_presence =>['wafl.iron.oc.deletedChangeLog', 'wafl.iron.oc.committedChangeLog'], event_end_time => $commit_end_time, return_string => 1); if ( $msg_log =~ /$aggregate/ ) { $Log->debug("Wafliron commit messages are verified"); $messages_found = 1; } } ## end if ( !$messages_found ) last if ( $aggr_state_verified && $messages_found ); } ## end while ( time() < $commit_end_time) if ( !$messages_found ) { $Log->exit() if $may_exit; NACL::Exceptions::VerifyFailure->throw( "Required commit messages are not found,\n messages: $msg_log" ); } ## end if ( !$messages_found ) if ( !$aggr_state_verified ) { $Log->exit() if $may_exit; NACL::Exceptions::VerifyFailure->throw( "Aggregate state is not offline after commit"); } $Log->exit() if $may_exit; } ## end sub commit =head2 reject $wafliron_obj->reject(); (Instance method) This function runs the wafliron reject command. Aggregate name set while instantiating the Wafliron MTask Object is being used as the aggregate param for wafliron reject. Example : # Start test use NACL::MTask::Wafliron; ... my $wafliron_obj = NACL::MTask::Wafliron->new( command_interface => $command_interface, aggregate => $aggregate, ); # Perform any operation here ... $wafliron_obj->start(%options); my $review_messages = $wafliron_obj->review(); # Perform check here. if ( $review_messages =~ m[...] ) { .... } # Reject the wafliron changes $wafliron_obj->reject(); =cut sub reject { $Log->enter() if $may_enter; my $self = shift; my $aggregate = $self->aggregate()->aggregate(); my $wafliron_c_object = $self->wafliron_component(); my $reject_msg1 = qr/Rejecting\s+change\s+log\s+files\s+for\s+\w+\s+$aggregate/i;#wafl.iron.oc.rejectChangeLog my $reject_msg2 = qr/Rejected\schanges\sfor\saggregate/i; # wafl.iron.oc.root.rejectedChanges my $log_del_msg = qr/Deleted\s+change\s+log\s+files\s+for\s+aggregate\s+$aggregate.*/i; # wafl.iron.oc.deletedChangeLog # Start capturing logs $self->event_log_object->start(); $wafliron_c_object->reject(); my $end_time = time() + LOCAL_WAFLIRON_TIMEOUT; my $wafl_state_proper = 0; my $reject_msgs_found = 0; while ( time() < $end_time ) { # Check wafliron state if ( !$wafl_state_proper ) { if ( $self->_get_wafliron_status() == 0 ) { $wafl_state_proper = 1; } } Tharn::snooze WAFLIRON_POLL_INTERVAL; # Check wafliron reject messages if ( !$reject_msgs_found ) { my $msg_log = $self->_get_matched_logs(log_detector => $self->event_log_object, check_for_all_presence =>['wafl.iron.oc.deletedChangeLog', 'wafl.iron.oc.rejectChangeLog', 'wafl.iron.oc.root.rejectedChanges'], event_end_time => $end_time, return_string => 1); if ( $msg_log =~ /$reject_msg1|$reject_msg2/ && $msg_log =~ /$log_del_msg/ ) { $reject_msgs_found = 1; } } ## end if ( !$reject_msgs_found) last if ( $wafl_state_proper && $reject_msgs_found ); } ## end while ( time() < $end_time) if ( !$wafl_state_proper ) { $Log->exit() if $may_exit; NACL::Exceptions::VerifyFailure->throw( "Wafliron state is not proper after reject"); } if ( !$reject_msgs_found ) { $Log->exit() if $may_exit; NACL::Exceptions::VerifyFailure->throw( "Failed to get reject message"); } $Log->exit() if $may_exit; } ## end sub reject =head2 stop $wafliron_obj->stop(); (Instance method) This function runs the wafliron stop command. Aggregate name set while instantiating the Wafliron MTask Object is being used as the aggregate param for wafliron stop. Example : # Start test use NACL::MTask::Wafliron; ... my $wafliron_obj = NACL::MTask::Wafliron->new( command_interface => $command_interface, aggregate => $aggregate, ); # Perform any operation here ... $wafliron_obj->start(%options); my $review_messages = $wafliron_obj->review(); # Perform check here. if ( $review_messages =~ m[...] ) { .... } # Abort wafliron $wafliron_obj->stop(); =cut sub stop { $Log->enter() if $may_enter; my $self = shift; my $aggregate = $self->aggregate()->aggregate(); my $wafliron_c_object = $self->wafliron_component(); # Start capturing logs my %event_opts; $event_opts{node} = $self->node() if(defined $self->node()); if ( defined $self->event_log_object ) { $self->event_log_object->start(); } else { my $log_detector = NACL::MTask::EventLogDetector->new( command_interface => $self->command_interface(), %event_opts, ); $self->event_log_object($log_detector); $self->event_log_object->start(); } # Issue iron stop command $wafliron_c_object->stop(); # Start Polling my $end_time = time() + LOCAL_WAFLIRON_TIMEOUT; my $wafl_state_proper = 0; my $stop_msgs_found = 0; while ( time() < $end_time ) { # Check wafliron state if ( !$wafl_state_proper ) { if ( $self->_get_wafliron_status() == 0 ) { $wafl_state_proper = 1; } } Tharn::snooze WAFLIRON_POLL_INTERVAL; # Check wafliron stop message if ( !$stop_msgs_found ) { my @events = $self->event_log_object->stop(check => 0); my $end_date = $self->event_log_object->event_end_time(); my @messages_obj = $self->_get_matched_logs(log_detector => $self->event_log_object, check_for_all_presence => ['wafl.scan.iron.done'], event_end_time => $end_date, return_string => 0); foreach my $obj (@messages_obj) { if ($obj->event() =~ /$aggregate/ ) { $stop_msgs_found = 1; last; } } } ## end if ( !$stop_msgs_found) last if ( $wafl_state_proper && $stop_msgs_found ); } ## end while ( time() < $end_time) if ( !$wafl_state_proper ) { $Log->exit() if $may_exit; NACL::Exceptions::VerifyFailure->throw( "Wafliron state is not proper after iron stop/abort"); } if ( !$stop_msgs_found ) { $Log->exit() if $may_exit; NACL::Exceptions::VerifyFailure->throw( "Failed to verify wafliron stop EMS message"); } $Log->exit() if $may_exit; } ## end sub stop =head2 start_on_multiple_aggr NACL::MTask::Wafliron->start_on_multiple_aggr( command_interface => $command_interface, aggregate => \@aggregate_object, ... ); (Instance method) This function runs the wafliron start command in parallel to achieve parallel running of wafliron in case of multiple aggregates. The two mandatory parameters command_interface and aggregate reference array is being used. The reference array can be an aggregate name array or aggregate component/task object array. The additional parameters can pe passed in as per requirements in running wafliron on aggregate array. The additional parameters are same as the one in $wafliron_obj->start() , as we are calling the same function in parallel internally in the code. =item "command_interface" => $command_interface ( Required, isa NACL::C::CommandInterface::ONTAP ) A component object that represents the host to which to send commands. See L =item "aggregate" => \@aggregate_obj ( Required, ARRAYREF ) Aggregate component or Aggregate task object array or an Aggregate name array on which we want to start the wafliron. =item "others" (Optional) The other items include the key, value pairs same as in start function above as the function is calling start function of wafliron in parallel to achieve the results. Example : # Start test use NACL::MTask::Wafliron; ... #create an array of aggregate/aggregate objects ... NACL::MTask::Wafliron->start_on_multiple_aggr( command_interface => $command_interface, aggregate => \@aggregate_object, ... ); =cut sub start_on_multiple_aggr{ my $self = shift; my %opts = validate_with( params => \@_, spec => { command_interface => { isa => 'NACL::C::CommandInterface::ONTAP' }, aggregate => { type => ARRAYREF }, %{ NACL::C::Component->_common_validate_spec_without_ci() } }, allow_extra => 1 ); my $aggr_ref = delete ($opts{aggregate}); my @aggr = @{$aggr_ref}; my $command_interface = delete ($opts{command_interface}); @aggr = uniq(@aggr); my @wafliron_obj; for my $aggregate (@aggr){ my $obj = NACL::MTask::Wafliron->new( command_interface => $command_interface, aggregate => $aggregate, ); push (@wafliron_obj , $obj); } my $counter = 1; my @proc_arr; for my $wafliron_obj (@wafliron_obj){ my $proc = NATE::Process->new( codespec => \&start, args => [ $wafliron_obj , %opts ], runid => "Wafliron_" . $counter, onexit => \&NATE::Process::on_exit_propagate_worst_result ); $counter++; push (@proc_arr , $proc); } my $procmgr = NATE::ProcManager->new( proc_info => \@proc_arr, background => 1, parallel => 1, ); $procmgr->start; $procmgr->wait; my $result = NATE::Result->is_failure( $procmgr->worst_result ); if ($result) { NATE::BaseException->throw( "Failed to perform wafliron"); } } ## end sub start_on_multiple_aggr 1;