# # Copyright (c) 2010-2017 NetApp, Inc., All Rights Reserved # Any use, modification, or distribution is prohibited # without prior written consent from NetApp, Inc. # ## @summary NACL Transit library ## @author anerudh@netapp.com, dl-nacl-dev@netapp.com ## @status shared ## @pod here package NACL::Transit; use base qw /NACL::Transit::RetrieveState/; use base qw /NACL::Transit::ChangeState/; use base qw /NACL::Transit::StateDefinitions/; use Params::Validate qw/:all/; use NACL::ComponentUtils qw(_hash_copy); use Tharn; use NATE::Log qw(log_global); my $Log = log_global(); my $may_enter = $Log->may_enter(); my $may_exit = $Log->may_exit(); use NATE::Time qw(timeout2time); use warnings; use strict; # Exceptions use NATE::Exceptions::Argument qw(:try); use NACL::Transit::Exceptions::RetriesExceeded qw(:try); my %spec_get_state = ( timeout => { type => SCALAR, default => "120"}, maxidle => { type => SCALAR, default => 120 }, append_output_to => { type => SCALARREF | UNDEF, default => undef }, transient => { type => SCALAR, default => 0 }, probe_timeout => {type => ARRAYREF, default => [5,120] }, get_state_timeout => {type => SCALAR, default => 120 }, fake_prompt_retry_timeout => {type => SCALAR, default => 5 }, user_defined_states => { type => ARRAYREF, optional => 1 }, ); my %spec_change_state = ( timeout => { type => SCALAR, default => "120" }, maxidle => { type => SCALAR, default => 120 }, append_output_to => { type => SCALARREF | UNDEF, default => undef }, # the above parameters are common to get_state and change_state change_state_timeout => { type => SCALAR, default => 600 }, maxcycle => { type => SCALAR, default => -1 }, powercycle => { type => SCALAR, default => 0 }, iftakenover => {type => SCALAR, default => "error" }, ifpanic => {type => SCALAR, default => "pass" }, restore_system_config => {type => SCALAR, default => 0 }, # for the following fields, defaults depend on hosttype # so the default values are filled in inside new() username => { type => SCALAR, default => "" }, #For user 'admin' specify 'password' if value is other than the one specified in tharnhost, while creating Transit object password => { type => SCALAR, default => "" }, #For non-admin users specify 'prompt_password' if value is other than the one specified in tharnhost, while creating Transit object prompt_password => {type => SCALAR, default => "" }, # for haltcmd, the value to use depends on hosttype and current state # Hence, the actual value is computed at run time. The default mentioned # here may not be the value being used. haltcmd => { type => SCALAR, default => "halt -f" }, inhibit_takeover => { type => SCALAR, default => 1 }, fake_prompt_retry_timeout => {type => SCALAR, default => 5 }, user_defined_states => { type => ARRAYREF, optional => 1 }, ); ############################################################################### # Method: new # Objective: Constructor to create new Transit object # Details: See POD documentation at the end of this file. ############################################################################### sub new { $Log->enter() if $may_enter; my ( $class, %args ) = (@_); # $args{name} can be a hostname, Hostrec object or a Connectrec object my $name = $args{name}; # should we prefer the service processor as our console mechanism? my $use_sp = $args{use_sp} // 0; my $ref; #to hold the object data my $conn; my $sp_apiset; if (not defined($name)) { $Log->exit() if $may_exit; NATE::Exceptions::Argument->throw("The parameter 'name' is required and should be defined"); } # NACL::Transit->new() supports various object types my $is_obj = ref($name); if ($is_obj) { if ($name->isa('Hostrec')) { if ($name->is_simulator()) { # ontap simulator (vsim), use the regular old console # connection $name = $name->id(); } else { # hardware ontap if ($use_sp) { # use the service processor $sp_apiset = NACL::APISet->new( hostobj => $name, category => "Node", interface => "CLI", set => "SP", connid => "sp_login", conntype => "libssh-persist", login_to_console => 1 ); $conn = $sp_apiset->get_connection(); } else { # use the regular old console $name = $name->id(); } } } elsif ($name->isa('Connectrec')) { if ( $name->conntype() =~ /(serial|pm_cons|rconsole)/ || $name->connid() =~ /(sp_login|bmc_login)/) { # Connectrec has already been created # we will try to use the same Connectrec $conn = $name; } else { $Log->exit() if $may_exit; NATE::Exceptions::Argument->throw( "The provided Connectrec is not a console connection"); } } elsif ($name->isa('NACL::APISet::Node::CLI::SP')) { # if it is an SP APISet object then extract the connection # object from the APISet $conn = $name->get_connection(); $sp_apiset = $name; } else { $Log->exit() if $may_exit; NATE::Exceptions::Argument->throw( "Object '$name' is not supported by NACL::Transit->new" ); } } else { # a hostname was passed in # this is intentionally left as a noop } # establish the connection and store it in the object ( if either the # hostrec or hostname was passed to the constructor ) $conn = Tharn::connect("$name#console") unless $conn; $spec_change_state{username}->{default} = $conn->{username}; $spec_change_state{password}->{default} = $conn->{password}; $spec_change_state{prompt_password}->{default} = $conn->{prompt_password}; $ref = { conn => $conn, _sp_apiset => $sp_apiset, validate_with( params => [], spec => {%spec_change_state, %spec_get_state} ) }; bless $ref, $class; $ref->{timeout_scaling_factor} = $ref->_get_timeout_scaling_factor(); $Log->exit() if $may_exit; return $ref; } ############################################################################## # Method: set # Objective: To set values of options that will be used in the method calls to # get_state or change_state # Details: See POD documentation at the end of this file. ############################################################################## sub set { my ($self, %args) = @_; $Log->enter() if $may_enter; my $spec = {%spec_change_state, %spec_get_state}; # Do validation. This is an atomic operation. If the validation should fail # for any reason, no field will be set and the exception will be thrown try { validate_with( params => \@_, spec => $spec ); } otherwise { $Log->exit() if $may_exit; NATE::Exceptions::Argument->throw($@); }; # the default change_state_timeout value is 600 # the default get_state_timeout value is 120 # If the user is changing the 'timeout' value in the object, # both change_state_timeout and get_state_timeout are set to the specified # value my $value; if (exists $args{timeout}) { $args{change_state_timeout} = $args{get_state_timeout} = $args{timeout}; } # The user provided halt command is appended with a space to distinguish # it from the library default value if (exists $args{haltcmd}) { $args{haltcmd} .= ' '; } # set the arguments foreach my $key ( keys %args ) { $self->{$key} = $args{$key}; } $self->{timeout} = $self->{get_state_timeout} . ','. $self->{change_state_timeout}; $Log->exit() if $may_exit; } ############################################################################## # Method: get # Objective: To get values of options that will be used in the method calls to # get_state or change_state # Details: See POD documentation at the end of this file. ############################################################################## sub get { my ( $self, @keys ) = @_; $Log->enter() if $may_enter; my (%hash, @wrong_keys); unless (@keys) { # no validation needs to be done map { $hash{$_} = $self->{$_}; } (keys %spec_change_state, keys %spec_get_state, 'conn'); } foreach my $key (@keys) { if ( exists $self->{$key} ) { $hash{$key} = $self->{$key}; } else { push( @wrong_keys, $key ); } } $Log->exit() if $may_exit; if (@wrong_keys) { NATE::Exceptions::Argument->throw("Invalid options: @wrong_keys specified"); } return %hash; } ############################################################################## # Method: get_state # Objective: To get the appliance's current state. # Details: See POD documentation at the end of this file. ############################################################################## sub get_state { my ($self, @args) = @_; $Log->enter() if $may_enter; my %opts; if (@args) { require Data::Dumper; %opts = validate_with( spec => \%spec_get_state, params => \@args ); $Log->debug("Overrides being passed are: ". Data::Dumper::Dumper(\@args)); } my ( $state, undef ) = $self->boot_state_delay( %opts ); $Log->exit() if $may_exit; return $state; } ############################################################################## # Method: wait_for_state # Objective: Wait for appliance to reach a certain state. # Details: See POD documentation at the end of this file. ############################################################################## sub wait_for_state { my ($self, @args) = @_; $Log->enter() if $may_enter; my %opts = validate_with ( spec => { wait_for => { type => SCALAR }, poll_interval => { type => SCALAR, default => 10 }, wait_for_state_timeout => { type => SCALAR, default => 300 }, }, params => \@args, allow_extra => 1, ); my $user_state = delete $opts{wait_for}; my $poll_interval = delete $opts{poll_interval}; my $get_state_timeout = delete $opts{get_state_timeout} || $self->{get_state_timeout}; my $timeout = delete $opts{wait_for_state_timeout}; $timeout = timeout2time($timeout); my $state = ""; my $console = ""; my $is_transient = $opts{transient}; if (! defined $is_transient) { try { $is_transient = "NACL::Transit::StateDefinitions::$user_state"->is_Transient(); } otherwise { $is_transient = 0; }; } $console = $opts{'append_output_to'} if defined ($opts{'append_output_to'}); my $console_output = ref $console ? $console: \$console; while ($self->_time_left_to_timeout($timeout) > 0) { try { my $time_left = $self->_time_left_to_timeout($timeout); my $new_get_state_timeout = ($time_left > $get_state_timeout) ? $get_state_timeout : $time_left; $state = $self->get_state(%opts, get_state_timeout => $new_get_state_timeout, append_output_to => $console_output, transient => $is_transient); } catch NACL::Transit::Exceptions::TransitException with { # Dont do anything }; if ($state eq $user_state || ($self->_time_left_to_timeout($timeout)) <= 0) { last; } else { Tharn::snooze($poll_interval); } } if ($state ne $user_state) { NACL::Transit::Exceptions::Timeout->throw("Timed out waiting for filer to reach state $user_state"); } $Log->exit() if $may_exit; return $state; } sub _time_left_to_timeout { my ($self, $timeout) = @_; return ($timeout - time()); } ############################################################################## # Method: change_state # Objective: To change the appliance's state to the state specified. # Details: See POD documentation at the end of this file. ############################################################################## sub change_state { my $self = shift; $Log->enter() if $may_enter; my %args = @_; my ($from, $to) = ( delete $args{'from'}, delete $args{'to'} ); @_ = %args; if (%args) { validate_with( spec => \%spec_change_state, params => \@_ ); $Log->debug("Overrides being passed are: @_"); } # This change is for backward compatibility. ZERO_DISKS state is # changed to WIPE_CONFIG because of the messages that we get # on filer console. Now system reboots before zero-ing the disks # system reboot scenario is also handled by WIPE_CONFIG state if ($to eq "ZERO_DISKS") { $to = "WIPE_CONFIG"; } # If $transit_to state is can only reached by one node # taking over the another node then inhibit_takeover # should be set to false if ($to eq "TAKEN_OVER_WAITING_ONTAP" || $to eq "TAKEN_OVER_QUERY") { $args{inhibit_takeover} = 0; } if ( !$args{powercycle} ) { $Log->exit() if $may_exit; return $self->transit_to_nopowercycle( to => $to, %args ); } else { my $state; try { $state = $self->transit_to_nopowercycle( to => $to, %args ); } catch NACL::Transit::Exceptions::RetriesExceeded with { $self->{conn}->hostp()->powercycle(); $state = $self->transit_to_nopowercycle( to => $to, %args ); }; $Log->exit() if $may_exit; return $state; } } # ############################################################################## # # Method: reboot # # Objective: To bring the appliance to the CLI state - if already in an ONTAP # # state, transit out of ONTAP and then transit to CLI # # Details: See POD documentation at the end of this file. # ############################################################################## sub reboot { my ( $self, %args ) = @_; $Log->enter() if $may_enter; my %get_state_opts; if (defined $args{to}) { $Log->exit() if $may_exit; NATE::Exceptions::Argument->throw("Invalid options: to(state) specified"); } $self->_hash_copy( source => \%args, target => \%get_state_opts, copy => [ qw(timeout maxidle append_output_to) ] ); my $state = $self->get_state( %get_state_opts,transient => 1, ); # Fix for 690243. If inhibit_takeover is set to true and # current state is not CLI then there is no way to pass # -f option to 'halt' command to avoid takeover. # So changing state to CLI before rebooting the Filer my $inhibit_takeover = $self->{inhibit_takeover}; if ($inhibit_takeover && $state ne "CLI") { $self->change_state(to => "CLI", %args); } if ( "NACL::Transit::StateDefinitions::$state"->is_ONTAP() ) { $self->change_state( to => "FIRMWARE", %args ); } $Log->exit() if $may_exit; return $self->change_state( to => "CLI", %args ); } sub get_connection { return shift->{conn} } sub _get_timeout_scaling_factor { my $timeout_scaling_factor = Tharn::param("APISET_TIMEOUT_SCALING_FACTOR") || Tharn::param("COMMAND_INTERFACE_TIMEOUT_SCALING_FACTOR") || Tharn::param("FILER_TIMEOUT_SCALING_FACTOR") || 1; return $timeout_scaling_factor; } 1; =head1 NAME NACL::Transit - The Transit library class =head1 SYNOPSIS use NACL::Transit =head1 DESCRIPTION A C contains data and methods required to perform state transition operations on the filer/node/vsim =head1 EXCEPTIONS =over =item NACL::Transit::Exceptions::RetriesExceeded =item NATE::Exceptions::Argument =back =head1 CONSTRUCTOR =head2 new Returns a reference to a newly created Transit object. Creates a new console connectrec object internally if required. If a console connectrec object is provided, then it will be reused. =over =item Synopsis $obj = NACL::Transit->new( name => $item ); =item Arguments =over =item name (Mandatory) The name or hostobj or connectrec object for the resource on which transit operations are going to be done. If the connectrec object is provided, the conntype must be console =item prefer_sp (Optional) If this =back =item Returns A reference to a new Transit object. =back =head1 Methods =head2 set This method is used to set the values of parameters that are used in the methods for getting and changing the state of the service. =over =item Synopsis $obj->set(%arglist) =item Arguments =over =item %arglist (Mandatory) A subset of (option name, value) pairs from the set of following options: (timeout, maxidle, append_output_to, change_state_timeout, maxcycle, powercycle, iftakenover, restore_system_config, username, password, haltcmd, transient, probe_timeout, get_state_timeout, conn) conn is a connectrec object, get_state_timeout and change_state_timeout are the timeout values for those methods and take values similar to 'timeout' explained in the documentation for get_state. Changing timeout will change both get_state_timeout and change_state_timeout. =back =item Returns Nothing on success. exception otherwise =back =head2 get This method is used to get the current values of parameters that are used in the methods for getting and changing the state of the service. =over =item Synopsis $obj->get(%arglist) =item Arguments =over =item @arglist (Optional) A subset of option names from the set of all options that can be taken by set() =back =item Returns A hash containing the options and their current values =back =head2 get_state This method is used to determine the current state of the service by listening to the console output. =over =item Synopsis $obj->get_state([transient=>0], [timeout=>120], [maxidle=>120], [probe_timeout=>[5,120]] ) =item Arguments =over =item transient (Optional) if 0, which is the default, then don't return any of the states which are transient.. Instead wait for a non Transient state. If 1, then return such a state if seen. =item timeout (Optional) how long to wait for something recognizeable before giving up. Default is 120 seconds. =item maxidle (Optional) how long to suffer total lack of *any* output by host before giving up. Default is 120 seconds (and so only interesting if timeout, above, is increased beyond its default of 120) =item probe_timeout (Optional) an array reference. If non-empty, then it lists times at which to send carriage returns in order to try to get a prompt. The default is at 5 seconds and 120 seconds. If the list runs out, the last value in the list is used repeatedly. (The 5 second setting is intended to handle the case where boot_state_delay is called on a console that was already sitting at a prompt before boot_state_delay was called; leave it out when you know that the appliance has already been pushed into changing states and should be printing something recognizable shortly. The 120 second setting is intended to recover, inefficiently and to the extent that one can, from a prompt never having been seen because the console output was lost or garbled. Leave it out when the product does not have bugs that cause this, but unfortunately our product does: see burt 308673, burt 296403, etc.) =item append_output_to (Optional) if given, the value should be a reference to a scalar. The output of the connection will be appended to this scalar. =item fake_prompt_retry_timeout (Optional) Default is 5 sec. Try to get state again if we think prompt return might be false with fake_prompt_retry_timeout as timeout value. =back =item Returns The current state. =back =head2 change_state This method is used to change the current state of the service to the specified state. =over =item Synopsis $obj->change_state(to =>$to, [option1=>value1, [option2=>value2 [,...]]]) =item Arguments =over =item to (mandatory) what state the appliance should go to. =item timeout (Optional) How long to wait before reaching the destination state. Default is 600 seconds. (this is a usual NATE timeout: number of seconds, or absolute time, with -1 meaning infinite). =item user_defined_states (Optional) This option can used to pass user defined states to Transit. user_defined_states value should be an arrayref like this user_defined_states => ["state_1" => "msg_1", . . ., "state_n" => "msg_n" ] User has to provide message along with the state. Transit will recognize these states and send whatever message is passed by user as input to filer to move to the next state. Examaples my @user_defined_states = ( "state_1" => "msg_1", . . ., "state_n" => "msg_n ); $transit->get_state(%get_state_opts, user_defined_states => \@user_defined_states); $transit->change_state(to => $to_state, %change_state_opts, user_defined_states => \@user_defined_states); $transit->wait_for_state(wait_for => $wait_for_state, %wait_for_state_opts, user_defined_states => \@user_defined_states); =item maxidle (Optional) How long to wait before erroring out because the appliance prints absolutely nothing for a span of this many seconds. Default is 120 seconds. =item inhibit_takeover (Optional) If 1, will inhibit filer from taking over its partner. The default is 1. =item maxcycle (Optional) Maximum number of states to go through before reaching the required one. The default is -1('-1' denotes that the maxcycle will be infinite). =item powercycle (Optional) if 1 and if transition fails once, try to cycle power on the host and then try exactly once more. The default is the value of param $prefix_POWERCYCLE if it exists, or else the field POWERCYCLE in the host record for the connection object that is created. (Note about power cycling: the routine used to cycle power is the "powercycle" method on the host object for the connectrec object, which in turn is probably "power_cycle >hostname<" or whatever the "powercmd" field in that host object is set to. For simulators, a powercycle means to interrupt the simulator process by typing ctrl-C at the simulator's console) =item username (Optional) the user to log in as, if a "login:" prompt appears (USERNAME state). The default is the value of param $prefix_USERNAME if that exists, or else the connection's "username" field, which tends to default to "root/admin" if not overridden. =item password (Optional) the password for login, if a "password:" prompt appears (PASSWORD state). The default is the value of param $prefix_PASSWORD if that exists, or else the connection's "password" field, which tends to default to the host's "default_password" field, which defaults to /netapp1! if not overriden. =item haltcmd (Optional) the command used to leave UP (or similar) state. The default is "halt -f" or system node halt -f -ignore-quorum-warnings -node local. =item from (Optional) the current state of the appliance. The library tries to guess this state, and so this option is better left out. =item iftakenover (Optional) what to do if this node has been taken over. If "error", then transit_to fails as soon as it reaches any of the TAKEN_OVER_* states. If "wait", then wait whenever it sees TAKEN_OVER_* states (useful if the partner will do giveback soon). If "takeback", then the behavior is as for "wait", but it also tries to release disk reservations and take back its filesystem whenever the opportunity arises (useful if the partner will halt or be reinitialized soon). When waiting, it only waits as long as "maxidle" or "timeout" options allow. =item restore_system_config (Optional) if 1, then option 6 "Update flash from backup config." is selected at the BOOT_MENU while transitioning to the UP state. The default is 0, and this selects option 1 at the BOOT_MENU i.e "Normal Boot". =item fake_prompt_retry_timeout (Optional) Default is 5 sec. Try to get state again if we think prompt return might be false with fake_prompt_retry_timeout as timeout value. =back =item Returns The state reached. =back =head2 reboot This method is used to reboot the appliance. If already in an ONTAP state, transit out of ONTAP to FIRMWARE and then to CLI. Otherwise reach CLI. =over =item Synopsis $obj->reboot(%arglist) =item Arguments =over =item timeout (Optional) How long to wait before reaching the destination state. Default is 600 seconds. (this is a usual NATE timeout: number of seconds, or absolute time, with -1 meaning infinite). =item maxidle (Optional) How long to wait before erroring out because the appliance prints absolutely nothing for a span of this many seconds. Default is 120 seconds. =item inhibit_takeover (Optional) If 1, will inhibit filer from taking over its partner. The default is 1. =item user_defined_states (Optional) This option can used to pass user defined states to Transit. user_defined_states value should be an arrayref like this user_defined_states => ["state_1" => "msg_1", . . ., "state_n" => "msg_n" ] User has to provide message along with the state. Transit will recognize these states and send whatever message is passed by user as input to filer to move to the next state. Examaples my @user_defined_states = ( "state_1" => "msg_1", . . ., "state_n" => "msg_n ); $transit->get_state(%get_state_opts, user_defined_states => \@user_defined_states); $transit->change_state(to => $to_state, %change_state_opts, user_defined_states => \@user_defined_states); $transit->wait_for_state(wait_for => $wait_for_state, %wait_for_state_opts, user_defined_states => \@user_defined_states); =item maxcycle (Optional) Maximum number of states to go through before reaching the required one. The default is 20. =item powercycle (Optional) if 1 and if transition fails once, try to cycle power on the host and then try exactly once more. The default is the value of param $prefix_POWERCYCLE if it exists, or else the field POWERCYCLE in the host record for the connection object that is created. (Note about power cycling: the routine used to cycle power is the "powercycle" method on the host object for the connectrec object, which in turn is probably "power_cycle >hostname<" or whatever the "powercmd" field in that host object is set to. For simulators, a powercycle means to interrupt the simulator process by typing ctrl-C at the simulator's console) =item username (Optional) the user to log in as, if a "login:" prompt appears (USERNAME state). The default is the value of param $prefix_USERNAME if that exists, or else the connection's "username" field, which tends to default to "root/admin" if not overridden. =item password (Optional) the password for login, if a "password:" prompt appears (PASSWORD state). The default is the value of param $prefix_PASSWORD if that exists, or else the connection's "password" field, which tends to default to the host's "default_password" field, which defaults to /netapp1! if not overriden. =item haltcmd (Optional) the command used to leave UP (or similar) state. The default is "halt -f" or system node halt -f -ignore-quorum-warnings -node local. =item from (Optional) the current state of the appliance. The library tries to guess this state, and so this option is better left out. =item iftakenover (Optional) what to do if this node has been taken over. If "error", then transit_to fails as soon as it reaches any of the TAKEN_OVER_* states. If "wait", then wait whenever it sees TAKEN_OVER_* states (useful if the partner will do giveback soon). If "takeback", then the behavior is as for "wait", but it also tries to release disk reservations and take back its filesystem whenever the opportunity arises (useful if the partner will halt or be reinitialized soon). When waiting, it only waits as long as "maxidle" or "timeout" options allow. =item restore_system_config (Optional) if 1, then option 6 "Update flash from backup config." is selected at the BOOT_MENU while transitioning to the UP state. The default is 0, and this selects option 1 at the BOOT_MENU i.e "Normal Boot". =back =item Returns 'CLI' if it could be reached. =back =head2 wait_for_state This method is used to wait for filer to reach certain state.Throws exception if timed out =over =item Synopsis $obj->wait_for_state(wait_for => $state, [wait_for_state_timeout=>120], [poll_interval=>10], %options_send_to_get_state ) =item Arguments =over =item wait_for (Required) Name of the state to wait for. =item wait_for_state_timeout (Optional) Duration to wait for the state to appear on filer's console. =item poll_interval (Optional) Interval at which to call get_state method to get the current state. =item Returns Returns wait_for state. =item other_options It accepts all the options accepted by get_state method. =item exceptions Throws NACL::Transit::Exceptions::Timeout exception if timeout occurred. =back =back =head1 AUTHOR/MAINTAINER =over =item NACL Development (dl-nacl-dev@netapp.com) =back =cut