## Copyright (c) 2017 NetApp, Inc., All Rights Reserved ## Any use, modification, or distribution is prohibited ## without prior written consent from NetApp, Inc. ## ## @summary Automated NDU Framework ## @author lkashyap@netapp.com ## @status review ## @pod here package NACL::STask::ClusterImage; use strict; use warnings; use base qw( NACL::C::ClusterImage NACL::C::ClusterImagePackage NACL::STask::STask ); use constant DEFAULT_TIMEOUT => 4200; use Hostrec qw(); use Params::Validate qw(validate_with validate :types ); use Data::Dumper; use NATE::Log qw(log_global); use NATE::BaseException qw(:try); use NATE::Time qw(timeout2time); use NACL::APISet; use NACL::APISet::Exceptions::CommandFailedException; use NACL::C::Node; use NACL::C::ClusterImage; use NACL::C::ClusterImagePackage; use NACL::C::Metrocluster; use NACL::CS::NetworkInterface; use NACL::CS::MetroclusterNode; use NACL::MTask::MCC::Metrocluster; use NACL::STask::Node; use NACL::C::Cluster; use NACL::C::SystemNodeImage; use NACL::GeneralUtils qw(random_name_generator); my $Log = log_global(); my $may_enter = $Log->may_enter(); my $may_exit = $Log->may_exit(); =head1 NAME NACL::STask::ClusterImage =head1 SYNOPSIS # Example my $Andu_Obj = NACL::STask::ClusterImage->update( command_interface => $Cserver, 'stabilize-minutes' => $Stabilize_mins, 'url' => $Package_url, ); =head1 DESCRIPTION This taks performs the update/upgrade of clusters using the automated nondisruptive update (ANDU) feature. C is a derived class of L. It is a task to perform an ONTAP ClusterImage Update/Upgrade given either a URL, a package, or the relevant parts of the path that would be used by this tals to derive a package e.g the ONTAP directory path, build flags and architecture name =head1 ATTRIBUTES =head2 command_interface =head2 url =head1 METHODS =head2 update my $Andu_Obj = NACL::STask::ClusterImage->update( command_interface => $Cserver, 'stabilize-minutes' => $Stabilize_mins, 'url' => $Package_url, ); or my $Andu_Obj = NACL::STask::ClusterImage->update( command_interface => $Cserver, 'stabilize-minutes' => $Stabilize_mins, arch => 'x86_64', build_flags => "debug domain sim" ontap_dir => $ontap_dir, ); All the nodes in the cluster will be updated/upgraded and this task will refresh the command_interface following node reboot as part of the upgrade process. =over =item Options =over =item C<< command_interface => $ci >> (Required) See L =item C<< url => $url >> (Required if ontap_dir, build_flags and arch are not provided) The full url path of package in either of these formats, e.g.: "http://web.netapp.com/engineering/x/eng/rlse/DOT/RfullsteamN/final/bedrock/export/x86.debug/tarball/image.tgz" or "/x/eng/rlse/DOT/RfullsteamN/final/bedrock/export/x86.debug/tarball/image.tgz". =item C<< url1 => $url1 >> if LTS double hop upgrade need to specify the url1 and url2 "http://web.netapp.com/engineering/x/eng/rlse/DOT/RDayBlazzer/final/bedrock/export/x86.debug/tarball/image.tgz" =item C<< url2 => $url2 >> if LTS double hop upgrade need to specify the url1 and url2 "http://web.netapp.com/engineering/x/eng/rlse/DOT/RBluePaddle/final/bedrock/export/x86.debug/tarball/image.tgz" =item C<< ontap_dir => $ot_dir >> (Optional) The ONTAP directory path, e.g. /x/eng/rlse/DOT/RfullsteamN/final/bedrock/export, if "url" is not provided. =item C<< build_flags => \@ >> (Optional) The build flags, e.g. pbo, debug, sim, gcov, etc., if "url" is not provided. =item C<< arch => $architecture >> (Optional) The architecture type, e.g. pc_elf, x86-64 etc., if "url" is not provided. =item C<< web_host => xyz >> (Optional) The hostname of the server to run an http_server on, defaults to "localhost" =item C<< stabilize-minutes => 2 >> (Optional) Specifies the number of minutes that the update should wait after a takeover or giveback is completed. This allows time for the clients to recover from the pause in I/O that occurs during takeover and give-back. =item C<< 'method-timeout' => $timeout_value_in_seconds >> (Optional) Default to 3600 seconds How long to wait for the completion of job started by 'update' operation. =item C<< 'force-rolling' => false|true >> (Optional) Default to false i.e. performs batch upgrade This option is used for clusters with eight or more nodes to specify that a rolling update (one HA pair at a time) should be done. =item C<< 'wait_for_completion' => 1|0 >> (Optional) Defaults to 1 i.e the update method does not return after starting the update. The update progress will be monitored for completion. Used for QA testing If set to 0, the update method starts the update and returns immediately, it does not monitor the update after starting it. =item C<< 'mcc' => 1|0 >> (Optional) Defaults to 0 i.e ONTAP update is on HA pair If set to 1, the update is on 4/8 pack MCC =item C<< 'is_lts_double_hop' => 1|0 >> (Optional) Defaults to 0 i.e lts double hop update is on HA pair If set to 1, updates double hop =item C< This type of exception is thrown when essential parameters are not defined or has null value =back =back =cut sub update { $Log->enter() if $may_enter; my $pkg_count = 1; my $pkg = shift; my %opts = $pkg->_common_validate_with( params => \@_, additional_spec => { url => { type => SCALAR | UNDEF, optional => 1 }, mcc => { type => BOOLEAN, optional => 1, default => 0 }, url1 => { type => SCALAR | UNDEF, optional => 1 }, url2 => { type => SCALAR | UNDEF, optional => 1 }, nodes => { type => SCALAR | UNDEF, optional => 1 }, web_host => { type => SCALAR | UNDEF, optional => 1 }, ontap_dir => { type => SCALAR | UNDEF, optional => 1 }, is_lts_double_hop => { type => BOOLEAN, optional => 1, default => 0 }, 'method-timeout' => { type => SCALAR, optional => 1 }, arch => { type => SCALAR | UNDEF, optional => 1, default => 'x86_64' }, build_flags => { type => SCALAR | UNDEF, optional => 1, default => 'debug' }, wait_for_completion => { type => BOOLEAN, optional => 1, default => 1 }, }, ignore_primary_keys => 1, allow_extra => 1, ); my $mcc = delete $opts{mcc}; my $url1 = delete $opts{url1}; my $url2 = delete $opts{url2}; my $is_lts_double_hop = delete $opts{is_lts_double_hop}; my $wait_for_completion = delete $opts{wait_for_completion}; if ( $is_lts_double_hop == 1 ) { NATE::BaseException->throw('Specify URL1 and URL2') if ( !defined $url1 || !defined $url2 ); } # Split the nodes/build_flag options in csv format to arrayref if ( defined( $opts{nodes} ) ) { $opts{nodes} = [ split( /,/, $opts{nodes} ) ]; } if ( defined( $opts{build_flags} ) ) { $opts{build_flags} = [ split( /\s/, $opts{build_flags} ) ]; } my %update_params; $pkg->_hash_move( source => \%opts, target => \%update_params, move => [ 'force-rolling', 'nodes', 'stabilize-minutes', ], ); foreach ( keys(%update_params) ) { delete $update_params{$_} if ( !defined $update_params{$_} ); } if ( !defined( $opts{url} ) ) { my ( @errorString, $tmpString ); $tmpString = $opts{ontap_dir}; $tmpString = _trim($tmpString); if ( length($tmpString) == 0 ) { push @errorString, "'ontap_dir' has a empty string for a value"; } $tmpString = $opts{build_flags}; $tmpString = _trim($tmpString); if ( length($tmpString) == 0 ) { push @errorString, "'build_flags' has a empty string for a value"; } $tmpString = $opts{arch}; $tmpString = _trim($tmpString); if ( length($tmpString) == 0 ) { push @errorString, "'arch' has a empty string for a value"; } if (@errorString) { $Log->exit() if $may_exit; NATE::BaseException->throw(@errorString); } my %args; %args = ( ontap_dir => $opts{ontap_dir}, build_flags => $opts{build_flags}, arch => $opts{arch}, ); $opts{url} = _guess_image_path( \%args ); } $Log->debug( 'Package : ' . $opts{url} ); my $client = $opts{web_host} || Hostrec->new( id => "localhost" ); my $httpd = NACL::Tools::httpd::control->new(); if ( $opts{url} !~ m@^(ftp|http)://@ ) { my $basename = substr( $opts{url}, ( rindex( $opts{url}, '/' ) + 1 ) ); my $url = $httpd->start( hostrec => $client, peer => $opts{command_interface}->hostrec()->default_ip, gets => { "/$basename" => $opts{url} }, ); $opts{url} = $url . "$basename"; } $opts{'method-timeout'} ||= DEFAULT_TIMEOUT; delete $opts{ontap_dir}; delete $opts{build_flags}; delete $opts{arch}; delete $opts{web_host}; my @nodes = NACL::STask::Node->find(); # Set AUTOBOOT arg _set_AUTOBOOT_bootarg( [@nodes] ); # Revert lifs to Home node _revert_lifs_to_home_node( [@nodes] ); # Delete root volume snapshots _delete_root_vol_snapshots( [@nodes] ); my ( $dr_partner_obj, $dr_parent_obj, $version, $is_2node_mcc ); if ($mcc) { my @metroclusternodes = NACL::CS::MetroclusterNode->fetch( command_interface => $nodes[0] ); my $metroclusternodes = $metroclusternodes[0]; $Log->comment( 'Local Site and Remote site nodes in MCC are ' . $metroclusternodes->node . ' and ' . $metroclusternodes->dr_partner ); $dr_parent_obj = NACL::C::Node->new( node => $metroclusternodes->node ); $dr_partner_obj = NACL::C::Node->new( node => $metroclusternodes->dr_partner ); my @mcc_node_names = map { $_->name() } @nodes; my $mcc_config_obj = NACL::MTask::MCC::Metrocluster->new( nodes => [@mcc_node_names], ); $is_2node_mcc = $mcc_config_obj->is_2_node(); $Log->comment( 'Is 2-node MCC: ' . $mcc_config_obj->is_2_node() ); # Delete the 'force-rolling' and 'stabilize-minutes' as 2nodeMCC # is not supported. if ( $mcc_config_obj->is_2_node() == 1 ) { delete $update_params{'force-rolling'}; delete $update_params{'stabilize-minutes'}; } foreach my $inter ( $dr_parent_obj, $dr_partner_obj ) { try { # Retrieve and delete the existing packages if any my @repo = $pkg->SUPER::show_repository( command_interface => $inter, ); map { $pkg->SUPER::delete( command_interface => $inter, version => $_->download_ver() ) } @repo; } catch NATE::BaseException with { my $ex = shift; if ( $ex->text() =~ /No matching cluster image package/ ) { # cluster repository is empty $Log->comment('Cluster image repository is empty'); } else { $Log->comment( 'Exception Caught: ' . $ex->throw() ); NATE::BaseException->throw('Unknown exception caught'); } }; } my $cserver_command_interface = $opts{command_interface}; foreach my $inter ( $dr_parent_obj, $dr_partner_obj ) { $Log->step( 'Get the package on the node ' . $inter->name() ); $opts{command_interface} = $inter; $pkg->command_interface( $opts{command_interface} ); # Download a new package $pkg->SUPER::get( command_interface => $inter, %opts, ); } # Allow package synchronization to complete Tharn::snooze(180); $opts{command_interface} = $cserver_command_interface; $pkg->command_interface( $opts{command_interface} ); foreach my $inter ( $dr_parent_obj, $dr_partner_obj ) { # Get the package version before starting the update $version = $pkg->SUPER::show_repository( command_interface => $inter, requested_fields => ['download-ver'], ); # Run cluster image validate on the remote site $Log->step( 'Run cluster image validate on ' . $inter->name() ); #Using cluster image validate raw command due to burt 1031746 and 781803 $inter->apiset() ->execute_raw_command( command => "set -showallfields false" ); $pkg->SUPER::validate( command_interface => $inter, filter => { version => $version->download_ver() }, allow_empty => 1, ); $inter->apiset() ->execute_raw_command( command => "set -showallfields true" ); } } else { try { # Retrieve and delete the existing packages if any my @repo = $pkg->SUPER::show_repository( command_interface => $opts{command_interface}, ); map { $pkg->SUPER::delete( command_interface => $opts{command_interface}, version => $_->download_ver() ) } @repo; } catch NATE::BaseException with { my $ex = shift; if ( $ex->text() =~ /No matching cluster image package/ ) { # cluster repository is empty $Log->comment('Cluster image repository is empty'); } else { $Log->comment( 'Exception Caught: ' . $ex->throw() ); NATE::BaseException->throw('Unknown exception caught'); } }; # Download a new package if ( $is_lts_double_hop == 1 ) { foreach my $url ( $url1, $url2 ) { $opts{url} = $url; $pkg->SUPER::get( command_interface => $opts{command_interface}, %opts, ); } } else { $pkg->SUPER::get( command_interface => $opts{command_interface}, %opts, ); } # Get the package version before starting the update $version = $pkg->SUPER::show_repository( command_interface => $opts{command_interface}, requested_fields => ['download-ver'], ); # Allow package synchronization to complete Tharn::snooze(180); } # Begin the update my $source_pem_path = "/var/etc/"; my $tmp_pem_path = "/tmp/".random_name_generator()."/"; my $cleanup_promo_nonpromo_workarnd = 0; my $update_count = 2; my $retry_count = 0; UPDATE: while (1) { try { $pkg->SUPER::update( %update_params, command_interface => $opts{command_interface}, version => $version->download_ver(), 'ignore-validation-warning' => 'true', 'skip-confirmation' => 'true', 'pause-after' => 'none', 'use-batch-update' => 'y', 'method-timeout' => 150, 'skip-validation' => 'false', ); $update_count++; if ( $wait_for_completion == 0 ) { $Log->exit() if $may_exit; return $pkg; } # Wait for update completion if ($is_2node_mcc) { $pkg->_monitor_phase_status_2node_mcc(%opts); } elsif ($is_lts_double_hop) { $pkg->_wait_for_completion_double_hop(%opts); } else { $pkg->wait_for_completion(%opts); } if ( $cleanup_promo_nonpromo_workarnd ) { my $Client_apiset = $nodes[0]->get_systemshell_apiset(); $Client_apiset->execute_raw_command( command => "sudo cp $tmp_pem_path*production*.pem $source_pem_path" ); $Client_apiset->rm(recursive => 1, force => 1, paths => $tmp_pem_path); } $Log->exit() if $may_exit; } catch NACL::C::Exceptions::ClusterImage::VerifySignaturesImage with { my $ex = shift; if ( $retry_count++ == 0 ) { if ( $opts{command_interface}->get_version_manager()->is_promoted() ) { my $Client_apiset = $nodes[0]->get_systemshell_apiset(); # create timestamped temporary directory $Client_apiset->mkdir( paths => $tmp_pem_path ); # list down all .pem files at /var/etc/ path $Client_apiset->ls( 'one-per-line' => 1, paths => $source_pem_path."*.pem" ); my @all_prd_backup_files = @{$Client_apiset->ls(paths => $source_pem_path."*production*.pem")->get_parsed_output()->[0]{'files'}}; $Client_apiset->execute_raw_command( command => "sudo cp $source_pem_path*production*.pem $tmp_pem_path" ); my @all_dev_files = @{$Client_apiset->ls(paths => $source_pem_path."*development*.pem")->get_parsed_output()->[0]{'files'}}; $Client_apiset->execute_raw_command( command => "sudo cp $source_pem_path*development*.pem $tmp_pem_path" ); my $dest_file; foreach my $source_file (@all_dev_files) { $source_file =~ s/$source_pem_path//g; $dest_file = $source_file; $dest_file =~ s/-development/-production/g; $Client_apiset->execute_raw_command( command => "sudo cp $tmp_pem_path$source_file $source_pem_path$dest_file" ); } $cleanup_promo_nonpromo_workarnd = 1; } else { my @nodes_in_cluster = NACL::CS::Cluster->fetch( command_interface => $opts{command_interface}, ); foreach (@nodes_in_cluster) { NACL::C::SystemNodeImage->dev_promoted_update( node => $_->node(), command_interface => $opts{command_interface}, ); } } goto UPDATE; } if ( $cleanup_promo_nonpromo_workarnd ) { my $Client_apiset = $nodes[0]->get_systemshell_apiset(); $Client_apiset->execute_raw_command( command => "sudo cp $tmp_pem_path*production*.pem $source_pem_path" ); $Client_apiset->rm(recursive => 1, force => 1, paths => $tmp_pem_path); } $ex->throw(); } catch NACL::APISet::Exceptions::CommandFailedException with { my $ex = shift; # Disable capabilities if ANDU downgrade if ( $ex->text() =~ /One or more capabilities which are/i ) { $pkg->_disable_capabilities( %opts, command_interface => $opts{command_interface}, comments => $ex->text(), ); if ( ! --$update_count ) { $ex->throw( 'Failed to disable capabilities.', $ex->text() ); } } else { $ex->throw( 'ANDU Failed: ', $ex->text() ); } }; redo UPDATE if ( $update_count == 1 ); last; } } ## end sub update sub _wait_for_completion_double_hop { $Log->enter() if $may_enter; my $pkg = shift; my %opts = @_; $pkg->_monitor_phase_status_double_hop( %opts, update_phase => 'validation' ); $pkg->_monitor_phase_status_double_hop( %opts, update_phase => 'ontap-updates' ); $pkg->_monitor_phase_status_double_hop( %opts, update_phase => 'post-update-checks' ); $Log->exit() if $may_exit; } ## end sub _wait_for_completion_double_hop sub _monitor_phase_status_double_hop { $Log->enter() if $may_enter; my $pkg = shift; my %opts = @_; my $scaled_timeout = $opts{command_interface} ->scale_timeout( 'method-timeout' => $opts{'method-timeout'} ); my $end_time = timeout2time($scaled_timeout); my $phase_result; while (1) { try { $phase_result = _get_phase_status( command_interface => $opts{command_interface}, ndu_phase => $opts{'update_phase'}, ); if ( time() > $end_time ) { NACL::Exceptions::Timeout->throw( 'After ' . "waiting for $scaled_timeout seconds, " . "the update has not completed.\n" ); } } catch NATE::BaseException with { my $ex = shift; # Connection failed, wait and retry if ( $ex->text() =~ /can not write/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Unexpected EOF while reading output from command/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Timeout waiting for command to complete/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /encountered an error/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Timeout has occured.Prompts will be resynced/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /There are no entries matching your query/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /No matching cluster image\(s\) found/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Connection timed out/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Could not create connection/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /reached the timeout/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } else { $Log->comment( 'NATE::Base Exception Caught: ' . $ex->throw() ); } Tharn::snooze(10); }; if ( $phase_result->{"$opts{'update_phase'}"} =~ /completed/ ) { $Log->comment("$opts{'update_phase'} completed successfully"); last; } elsif ( $phase_result->{"$opts{'update_phase'}"} =~ /in-progress/ ) { $Log->comment("$opts{'update_phase'} is in-progress"); } elsif ( $phase_result->{"$opts{'update_phase'}"} =~ /failed|paused-on-error/ ) { NATE::BaseException->throw( 'ANDU is failed: ' . $phase_result->{"$opts{'update_phase'}"} ); } else { NATE::BaseException->throw( 'ANDU is failed: ' . Dumper $phase_result); } Tharn::snooze(60); } $Log->exit() if $may_exit; } ## end sub _monitor_phase_status_double_hop sub _get_phase_status { my %opts = @_; my $Cserver = $opts{command_interface}; my $ndu_phase = defined $opts{ndu_phase} ? $opts{ndu_phase} : 'validation'; my $fields = defined $opts{fields} ? $opts{fields} : 'phase-status'; my $result = $Cserver->apiset() ->execute_raw_command( command => "cluster image show-update-progress -ndu-phase $ndu_phase -fields $fields" ); my @parsed_result_after_chomped; my @parsed_result = split /\n/, $result; foreach my $line (@parsed_result) { chomp $line; push @parsed_result_after_chomped, $line if ( $line =~ /\w+/ ); } my $phase_string = shift @parsed_result_after_chomped; $Log->comment( 'update phase string is: ' . $phase_string ); if ( $fields eq 'phase-status' ) { splice @parsed_result_after_chomped, 0, 2; my %phase_output; foreach my $phase (@parsed_result_after_chomped) { $phase =~ /(\S+)X;X(\S+)X;X/; $phase_output{$1} = $2; } return {%phase_output}; } else { return $phase_string; } } ## end sub _get_phase_status sub _monitor_phase_status_2node_mcc { $Log->enter() if $may_enter; my $pkg = shift; my %opts = @_; my $scaled_timeout = $opts{command_interface} ->scale_timeout( 'method-timeout' => $opts{'method-timeout'} ); my $end_time = timeout2time($scaled_timeout); my $sup_obj; while (1) { try { my $apiset = $opts{command_interface}->apiset(); my $command = 'cluster image show-update-progress'; $sup_obj = $apiset->execute_raw_command( command => $command ); if ( time() > $end_time ) { NACL::Exceptions::Timeout->throw( 'After ' . "waiting for $scaled_timeout seconds, " . "the update has not completed.\n" ); } } catch NATE::BaseException with { my $ex = shift; # Connection failed, wait and retry if ( $ex->text() =~ /can not write/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Unexpected EOF while reading output from command/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Timeout waiting for command to complete/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /encountered an error/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Timeout has occured.Prompts will be resynced/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /There are no entries matching your query/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /No matching cluster image\(s\) found/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Connection timed out/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Could not create connection/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /reached the timeout/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } else { $Log->comment( 'NATE::Base Exception Caught: ' . $ex->throw() ); } Tharn::snooze(10); }; if ( $sup_obj =~ /completed/ ) { my $count = () = $sup_obj =~ /XcompletedX/ig; if ( $count == 2 ) { $Log->comment('Update completed successfully'); last; } elsif ( $count == 1 ) { if ( $sup_obj =~ /in-progress/ ) { $Log->comment( 'One node got updated the other is in-progress'); } } elsif ( $sup_obj =~ /failed|paused-on-error/ ) { NATE::BaseException->throw( 'ANDU: ' . Dumper $sup_obj); } } elsif ( $sup_obj =~ /in-progress/ ) { if ( $sup_obj =~ /waiting/ ) { $Log->comment('One node is waiting the other is in-progress'); } elsif ( $sup_obj =~ /failed|paused-on-error/ ) { NATE::BaseException->throw( 'ANDU: ' . Dumper $sup_obj); } } Tharn::snooze(60); } $Log->exit() if $may_exit; } ## end sub _monitor_phase_status sub _monitor_phase_status { $Log->enter() if $may_enter; my $pkg = shift; my %opts = @_; my $scaled_timeout = $opts{command_interface} ->scale_timeout( 'method-timeout' => $opts{'method-timeout'} ); my $end_time = timeout2time($scaled_timeout); my $sup_obj; while (1) { try { $sup_obj = $pkg->show_update_progress( command_interface => $opts{command_interface}, filter => { 'ndu-phase' => $opts{'update_phase'} }, requested_fields => ['phase-status'], 'method-timeout' => 120, ); if ( time() > $end_time ) { NACL::Exceptions::Timeout->throw( 'After ' . "waiting for $scaled_timeout seconds, " . "the update has not completed.\n" . 'Update Phase: ' . $sup_obj->ndu_phase() . "\nPhase Status:" . $sup_obj->phase_status() ); } } catch NATE::BaseException with { my $ex = shift; # Connection failed, wait and retry if ( $ex->text() =~ /can not write/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Unexpected EOF while reading output from command/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Timeout waiting for command to complete/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /encountered an error/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Timeout has occured.Prompts will be resynced/i ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /There are no entries matching your query/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /No matching cluster image\(s\) found/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Connection timed out/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Could not create connection/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /reached the timeout/ ) { $Log->comment( $ex->text ); Tharn::snooze(30); $Log->comment('Refreshing Command Interface'); $opts{command_interface}->refresh_command_interface(); } else { $Log->comment( 'NATE::Base Exception Caught: ' . $ex->throw() ); } Tharn::snooze(10); }; if ( $sup_obj->phase_status =~ /completed/ ) { $Log->comment( $sup_obj->ndu_phase() . ' phase completed successfully' ); last; } if ( $sup_obj->phase_status =~ /failed|paused-on-error/ ) { NATE::BaseException->throw( 'ANDU: ' . $sup_obj->ndu_phase() . ' Phase reached ' . $sup_obj->phase_status() . ' state.' ); } Tharn::snooze(60); } $Log->exit() if $may_exit; } ## end sub _monitor_phase_status sub _trim { my $string = shift; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string; } ## end sub _trim sub _guess_image_path { $Log->enter() if $may_enter; $Log->enter() if $may_enter; my $href = shift; my $build_flags = $href->{'build_flags'}; my $image_path = $href->{'ontap_dir'}; $image_path .= "/$href->{'arch'}"; foreach my $a_flag (@$build_flags) { while ( $a_flag !~ /^\s*$/ ) { if ( $a_flag =~ /(^debug|optimize)/ || $a_flag =~ /(domain)/ || $a_flag =~ /(gcov)/ || $a_flag =~ /(sim)/ ) { $image_path .= ".$1"; $a_flag =~ s/$1//; } elsif ( $a_flag =~ /(non-debug)/ ) { $a_flag =~ s/$1//; } else { $Log->exit() if $may_exit; NATE::BaseException->throw( "'build_flags' has a invalid value $a_flag , should be either of debug,non-debug,domain,gcov,sim or combination of these" ); } } } $image_path .= "/tarball/image.tgz"; $Log->comment('The Image Path is: ' . $image_path); $Log->exit() if $may_exit; return $image_path; } ## end sub _guess_image_path sub wait_for_completion { $Log->enter() if $may_enter; my $pkg = shift; my %opts = @_; $pkg->_monitor_phase_status( %opts, update_phase => 'validation' ); $pkg->_monitor_phase_status( %opts, update_phase => 'ontap-updates' ); $pkg->_monitor_phase_status( %opts, update_phase => 'post-update-checks' ); $Log->exit() if $may_exit; } ## end sub wait_for_completion =head2 wait_for_task_status This method will wait for the ANDU update to reach a given phase viz. do-download-job, do-failover-job, do-giveback-job, do-postupdate-tasks etc. my $status = $Andu_Obj->wait_for_task_status( command_interface => $Cserver, 'status' => 'started', 'trans_name' => 'do-download-job', 'component_id' => 'MUM', ); =over =item Options =item C<< command_interface => $ci >> (Required) See L =item C<< status => $status >> (Required) the required state that the andu update phase we are waiting for =item C<< trans_name => $transaction_name >> (Required) The transaction name for which the update needs to wait for and return =item C<< component_id => $component >> (Required) The name of the node where the trans-name is required to reach the given state =back =cut sub wait_for_task_status { $Log->enter() if $may_enter; my $pkg = shift; my %task_info = @_; my $status; my $scaled_timeout = 1800; my $end_time = timeout2time($scaled_timeout); my $node_name; while (1) { try { if ( $task_info{component_id} =~ /MUM/i ) { # post-update-checks runs on the MUM node if ( $task_info{trans_name} =~ /do-postupdate-checks-task/ ) { $node_name = 'MUM'; } else { my $cserver_apiset = $task_info{command_interface}->apiset(); my $resp = $cserver_apiset->cluster_image_show_upgrade_manager(); my $output = $resp->get_parsed_output(); $node_name = $$output[0]->{'mum-node'}; $Log->debug( 'Getting MUM Node: ' . $node_name ); } } elsif ( !exists $task_info{component_id} ) { $Log->debug('No node name provided'); $node_name = '*',; } else { $node_name = $task_info{component_id}; } $status = $pkg->SUPER::show_update_log( command_interface => $task_info{command_interface}, filter => { status => $task_info{status}, 'trans-name' => $task_info{trans_name}, 'component-id' => $node_name, }, requested_fields => [qw(status component-id)], 'method-timeout' => 1200, ); last if ($status); } catch NACL::Exceptions::NoElementsFound with { my $exception = shift; $Log->comment('No such tasks found'); } catch NATE::BaseException with { my $ex = shift; # Connection failed, wait and retry if ( $ex->text() =~ /can not write/i ) { $Log->comment( $ex->text ); Tharn::snooze(10); $Log->debug('Refreshing Command Interface'); $task_info{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /Could not create connection for/i ) { $Log->comment( $ex->text ); Tharn::snooze(10); $Log->debug('Refreshing Command Interface'); $task_info{command_interface}->refresh_command_interface(); } elsif ( $ex->text() =~ /No matching cluster image/i ) { $Log->comment( $ex->text ); Tharn::snooze(1); } else { $Log->comment( 'NATE::Base Exception Caught: ' . $ex->throw() ); } }; if ( time() > $end_time ) { NACL::Exceptions::Timeout->throw( 'After ' . "waiting for $scaled_timeout seconds, " . "the update has not reached the.\n" . 'Update Phase: ' . $task_info{trans_name} . "\n" . 'Update Status: ' . $task_info{status} . "\n" ); } Tharn::snooze(10); } return $status; } ## end sub wait_for_task_status sub _disable_capabilities { my $pkg = shift; my %opts = @_; my %commands; $Log->comment("Disable obsolete capabilities before downgrading"); my $cserver_apiset = $opts{command_interface}->apiset(); my $colon_sep_update_comments = join( " ", split( /\n/, $opts{comments} ) ); my @lines = split( /:/, $colon_sep_update_comments ); foreach my $l (@lines) { if ( $l =~ /"(.*-downgrade.*?)"/ ) { $commands{$1} = ''; } } foreach my $command ( keys(%commands) ) { $Log->comment("Disabling the capability : $command"); $cserver_apiset->execute_command( 'privilege-level' => 'advanced', 'command' => $command, ); Tharn::snooze(3); } } ## end sub _disable_capabilities # This sub will delete the root_volume snapshots. sub _delete_root_vol_snapshots { $Log->enter() if $may_enter; my @Nodes = @{ shift @_ }; # Delete the root volume snapshots to free the space. $Log->step('Delete the Root Volume Snapshots'); foreach my $node_var (@Nodes) { my $node = $node_var->node; my $api_obj = $node_var->apiset(); # Get the list of snapshots available on volume 'vol0' # on both the nodes. my $common_str = "run -node $node -command \"priv set test; snap"; my $res = $api_obj->execute_command( command => $common_str . " list -b vol0\"" ); next if ( $res =~ /No\ssnapshots\sexist/ ); my @snap_arr = split /\n/, $res; @snap_arr = map ( /^(nightly.\d|hourly.\d)/, @snap_arr ); # If the snapshots are available for 'vol0', # delete them if (@snap_arr) { foreach my $snap (@snap_arr) { $api_obj->execute_command( command => $common_str . " delete -V vol0 $snap\"" ); } } } $Log->exit() if $may_exit; } sub _set_AUTOBOOT_bootarg { $Log->enter() if $may_enter; my @nodeobj = @{ shift @_ }; # Get the Node apiset my $ApiSet_Obj = $nodeobj[0]->apiset(); #Set the AUTOBOOT args. $Log->step('Set \'bootargs set AUTOBOOT true\''); $ApiSet_Obj->execute_command( command => "set test;run * bootargs set AUTOBOOT true" ); $Log->exit() if $may_exit; } sub _revert_lifs_to_home_node { $Log->enter() if $may_enter; my @nodeobj = @{ shift @_ }; $Log->step('Enable cluster HA on the setup'); try { NACL::C::ClusterHa->modify( command_interface => $nodeobj[0], configured => "true" ); } catch NATE::BaseException with { my $ex = shift; if ( $ex->text() =~ /command failed\: Cluster high\-availability can only be enabled on a cluster with exactly two eligible nodes./ ) { # cluster repository is empty $Log->comment('Cluster is having more than 2 nodes'); } else { $Log->comment( 'Exception Caught: ' . $ex->throw() ); NATE::BaseException->throw('Unknown exception caught'); } }; $Log->step('Revert lifs to home node, if necessary'); my @lifs = NACL::CS::NetworkInterface->fetch( command_interface => $nodeobj[0], filter => { role => "cluster,data,node-mgmt", }, requested_fields => [qw (is-home)], ); foreach my $lif (@lifs) { if ( $lif->is_home() =~ /false/ ) { $Log->comment('Reverting Lif to home node'); NACL::STask::NetworkInterface->revert( command_interface => $nodeobj[0], lif => $lif->lif(), vserver => $lif->vserver(), ); } } # Displaying the LIF status @lifs = NACL::CS::NetworkInterface->fetch( command_interface => $nodeobj[0], filter => { role => "cluster,data,node-mgmt", }, requested_fields => [qw (is-home)], ); foreach my $l (@lifs) { $Log->comment( 'LIF: ' . $l->lif() . ' IS HOME ' . $l->is_home() ); } $Log->exit() if $may_exit; } 1;