# # Copyright (c) 2013-2017 NetApp, Inc., All Rights Reserved # Any use, modification, or distribution is prohibited # without prior written consent from NetApp, Inc. # ## @summary Node Task Module ## @author dl-nacl-dev@netapp.com ## @status shared ## @pod here package NACL::STask::Node; use strict; use warnings; use base qw(NACL::C::Node NACL::STask::STask); use NATE::Log qw(log_global); my $Log = log_global(); use Params::Validate qw(ARRAYREF HASHREF OBJECT SCALAR validate validate_with :types); my $may_enter = $Log->may_enter(); my $may_exit = $Log->may_exit(); use NATE::Exceptions::Argument qw(:try); use NACL::Exceptions::HighAvailabilityError; use NACL::APISet::Exceptions::TimeoutException; use Data::Dumper; use NACL::C::NetworkPort; use NACL::C::NetworkIpspace; use NACL::C::StorageFailover; use NACL::C::SystemServicesNtpConfig; use NACL::C::SystemNodeDate; use NACL::APISet; use NACL::Transit; use NACL::STask::Cluster; use NACL::C::Cluster; use Scalar::Util qw(blessed); use NACL::STask::StorageFailover; use NACL::Exceptions::VerifyFailure; our %_hostrec_lif_info_verbose_checks_default; =head1 NAME NACL::STask::Node =head1 DESCRIPTION C provides a number of well-defined but potentially complex or multi-step methods related to Node in ONTAP. It builds on top of, and is a derived class of C, and so it also provides methods that are more in the scope of individual Node-related commands. See C for details. This also means that a C object may generally be used in place of a component object. Cleanup can be registered for the following methods Cleanup methods are, Node Method Cleanup Method ---------------------------------------- rename rename =head1 ATTRIBUTES =head2 node (Required) As C. The name of the node, used to identify it uniquely. =head1 METHODS =head2 get_partner $node->get_partner( "method-timeout" => $timeout, ); NACL::STask::Node->get_partner( "method-timeout" => $timeout, command_interface => $command_interface, ); (Class or instance method) This method returns the HA partner name. If "-" is returned by the filer, a NACL::Exceptions::HighAvailabilityError exception will be thrown. =over =item Options =over =item C<< command_interface=>$command_interface >> (Required for class method, Not Applicable for instance method) See L =item C<< "method-timeout"=>$timeout >> (Optional) How long in seconds to wait for the component calls to wait. Defaults to 60 seconds. =back =back =head2 get_partner_obj $node->get_partner_obj( "method-timeout" => $timeout, ); NACL::STask::Node->get_partner_obj( "method-timeout" => $timeout, command_interface => $command_interface, ); (Class or instance method) This method returns the HA partner as an STask::Node object. If no partner is found (if HA is not configured) an exception will be thrown. =over =item Options =over =item C<< command_interface=>$command_interface >> (Required for class method, Not Applicable for instance method) See L =item C<< "method-timeout"=>$timeout >> (Optional) How long in seconds to wait for the component calls to wait. Defaults to 60 seconds. =back =back =head2 panic $node->panic(); NACL::STask::Node->panic( command_interface => $command_interface, "method-timeout" => $timeout, nacltask_reboot => 1, #default 1 nacltask_coredump => 0, #default 1 ); (Class or instance method) This method causes a sysctl panic. =over =item Options =over =item C<< nacltask_reboot=>0|1 >> (Optional) If 1 (the default), reboot the node after the panic and bring the Node state to CLI. If 0, do not reboot, and give the control back after the panic has been caused. =item C<< nacltask_coredump=>0|1 >> (Optional) If 1 (the default), panic the node with a core dump If 0, panic the node without a core dump (faster and useful for working with did_bad_happen script) =item C<< "method-timeout"=>$timeout >> (Optional) How long in seconds to wait for the Transit call to reboot. Defaults to 1000 seconds. =item C<< node >> (Optional for Class Method only. Not Applicable for instance method) Specifies the node which has to be panicked. It can be a scalar or object of type NACL::C::CommandInterface::ONTAP. If not specified, the node corresponding to the command_interface is panicked. =item C<< command_interface=>$command_interface >> (Required for class method, Not Applicable for instance method) See L =back =back =head2 get_hostrec_lif_info $node->get_hostrec_lif_info( "method-timeout" => $timeout, default_mtu => "1500", required_fields => [qw( NAME IP MASK GATEWAY PORT )], verbose_checks => 1, ); NACL::STask::Node->get_hostrec_lif_info( "method-timeout" => $timeout, command_interface => $command_interface, default_mtu => "1500", required_fields => [qw( NAME IP MASK GATEWAY PORT )], verbose_checks => 1, ); (Class or instance method) This method retrieves the LIF data from the hostrec structures (which comes from the NATE hosts file, or the command-line). This method will default missing data if reasonable defaults are possible, and will check that required fields are all present. These are the nate hosts variables that this routine looks for when role == CLUSTER|INTERCLUSTER|DATA: _PORTx _IPx _MASKx _GATEWAYx _MTUx When role == MGMT|CLUSTER_MGMT, it looks for: _PORT _IP _MASK _GATEWAY _MTU Alternatively, also supported is a more compact form that was developed a few years ago, but it is not widely used: _CONFIGx="port=e0a address=172.31.13.18 netmask=255.255.192.0 gateway=172.31.0.1 mtu=1500" If NAME is not specified for a given LIF, the following defaults will be used: role == CLUSTER NAME = "clus".$suffix, where $suffix = 1,2,3,4, etc... role == INTERCLUSTER NAME = "inter_clus".$suffix, where $suffix = 1,2,3,4, etc... role == CLUSTER_MGMT NAME = "cluster_mgmt" role == MGMT NAME = "mgmt" role == anything else NAME = $node."_".( lc $role ).$suffix, where $suffix = 1,2,3,4, etc... If PORT is not specified for a given LIF, the node will be queried for existing ports, and if a port exists of the appropriate role, the ports will be assigned in round-robin fashion to the lifs which are missing PORT specifiers. If any required fields are missing, the following exception will be thrown: L Contained within the text of the exception will be all of the required field checks which have failed. =over =item Return Returns a hash keyed by MGMT,CLUSTER_MGMT,CLUSTER,INTERCLUSTER,DATA. The value of the hash key is an array of hashes, keyed by MTU,NAME,GATEWAY,IP,MASK,PORT Ex: 'CLUSTER' => [ { 'MTU' => '9000', 'NAME' => 'clus1', 'GATEWAY' => '10.225.128.1', 'IP' => '10.225.128.81', 'MASK' => '255.255.248.0', 'PORT' => 'e0a' }, { 'MTU' => '9000', 'NAME' => 'clus2', 'GATEWAY' => '10.225.128.1', 'IP' => '10.225.129.81', 'MASK' => '255.255.248.0', 'PORT' => 'e0b' } ], =back =over =item Options =over =item C<< command_interface=>$command_interface >> (Required for class method, Not Applicable for instance method) See L =item C<< "roles"=>\@array_of_lif_roles >> (Optional) What lif roles do you want to retrieve hostrec info for? Possible values are MGMT,CLUSTER_MGMT,CLUSTER,INTERCLUSTER,DATA. If role is not found in hostrec, action taken will depend on the 'if_role_missing' option below. Default is MGMT,CLUSTER_MGMT,CLUSTER,INTERCLUSTER,DATA. =item C<< "if_role_missing"=>ignore|die >> (Optional) What action to take if the requested role is not present in the hostrec? Default is to ignore (i.e. do nothing) =item C<< "default_mtu"=>$default_mtu_value >> (Optional) What value for MTU should be used if none is specified in the NATE hosts file for a given LIF? Default is 1500. =item C<< "default_mtu_per_role"=>\%defaults_keyed_by_role >> (Optional) Same as default_mtu, but allows you to specify different defaults per role. Default is 1500 for each role. =item C<< "required_fields"=>\@array_of_field_names >> (Optional) If these fields are not defined, or get_hostrec_lif_info() cannot come up with reasonable defaults, a NATE::BaseException will be thrown. While we can default MTU, and possibly look at ports of a given role already configured on a filer, some fields cannot be defaulted (like IP, MASK, GATEWAY), and we might not be able to find an existing port of a given role. In the case where a required fields is still unspecified after our best efforts, an exception will be thrown with all of the errors encountered. Default is: NAME,IP,MASK,GATEWAY,PORT =item C<< "required_fields_per_role"=>\%hash_of_field_names_keyed_by_role >> (Optional) This is the same information as required_fields, but allows you to specify fields per role. Default is the same as required_fields for each role. =item C<< "verbose_checks"=>-1|0|1 >> (Optional) Should the results of the get_hostrec_lif_info() checks find their way into the log file as comments? Default (-1) is to only report on these issues the first time get_hostrec_lif_info() is called for a given node. =item C<< "method-timeout"=>$timeout >> (Optional) How long in seconds to wait for the component calls to wait. Defaults to 60 seconds. =item C<< apiset_must=>$ruleset >> (Optional)See L =item C<< apiset_should=>$ruleset >> (Optional)See L =back =back =over =item Exceptions =over =item C This type of exception when no ports are available with the specified role exist on the node. =item C This type of exception is thrown when HA partner not available. =back =back =cut sub get_partner { $Log->enter() if $may_enter; my $me = (caller(0))[3]; my $pkg_or_obj = shift; my %opts = $pkg_or_obj->_common_validate_with( params => \@_, additional_spec => {node => {type => SCALAR, optional => 1}}, ignore_primary_keys => 1, ); my $command_interface = delete $opts{command_interface}; $command_interface = ref($pkg_or_obj) ? $pkg_or_obj : $command_interface; my $node = delete $opts{node} || $command_interface->node(); my %common_opts; $pkg_or_obj->_copy_common_component_params( source => \%opts, target => \%common_opts ); my $partner; my $sfo; try { # StorageFailover appears to have C and 7 mode support $sfo = NACL::CS::StorageFailover->fetch( command_interface => $command_interface, filter => {node => $node}, %common_opts ); $partner = $sfo->partner_name(); } ## end try catch NATE::BaseException with { my $ex = shift; # CLI will return "-", but ZAPI can throw an internal error exception if # HA is not enabled $Log->comment("$me - NACL::CS::StorageFailover->fetch() threw " . ref($ex) . " exception,\n" . $ex->text()); ## throw exception in case of timeout if($ex->text() =~ /Timeout waiting for command to complete/i){ $ex->throw(); }else{ $partner = "-"; } }; if ( ($partner eq '-') || ($partner eq '') || ($partner eq 'unknown')) { $Log->exit() if $may_exit; NACL::Exceptions::HighAvailabilityError->throw( "HA partner not available", node => $node, state => $sfo ); } ## end if ( ( $partner eq '-'... $Log->exit() if $may_exit; return $partner; } ## end sub get_partner sub get_partner_obj { $Log->enter() if $may_enter; my $pkg_or_obj = shift; my %opts = (@_); my $partner = $pkg_or_obj->get_partner(%opts); my $ci = $opts{command_interface} || $pkg_or_obj; $Log->exit() if $may_exit; return NACL::STask::Node->new( command_interface => $ci, node => $partner ); } ## end sub get_partner_obj sub get_hostrec_lif_info { $Log->enter() if $may_enter; my $me = (caller(0))[3]; my $pkg_or_obj = shift; my %orig_opts = validate_with( params => \@_, spec => { command_interface => {isa => 'NACL::C::CommandInterface', default => undef}, roles => { type => ARRAYREF, default => [qw( MGMT CLUSTER CLUSTER_MGMT INTERCLUSTER DATA )] }, families => { type => ARRAYREF, default => [qw( inet4 inet6 )] }, source => { type => SCALAR, default => "scalar_vars", callbacks => { "Value of 'source' should be 'scalar_vars' or 'net_records'" => sub { $_[0] =~ /^(?:scalar_vars|net_records)$/ } } }, if_role_missing => { type => SCALAR, default => "ignore", callbacks => { "Value of 'if_role_missing' should be 'ignore' or 'die'" => sub { $_[0] =~ /^(?:die|ignore)$/; } } }, default_mtu => {type => SCALAR, default => 1500}, default_mtu_per_role => { type => HASHREF, default => { MGMT => undef, CLUSTER => undef, CLUSTER_MGMT => undef, INTERCLUSTER => undef, DATA => undef, RLM => undef, } }, required_fields => { type => ARRAYREF, default => [qw( NAME IP MASK GATEWAY PORT FAMILY )] }, required_fields_per_role => { type => HASHREF, default => { MGMT => undef, CLUSTER => undef, CLUSTER_MGMT => undef, INTERCLUSTER => undef, DATA => undef, RLM => undef, } }, verbose_checks => {type => SCALAR, default => -1}, }, allow_extra => 1, ); my $cmd_int = $orig_opts{command_interface} || $pkg_or_obj; my $default_mtu = 1500; my $default_required_fields = [qw( NAME IP MASK GATEWAY PORT FAMILY )]; my @verify_params = %orig_opts; my %opts = $pkg_or_obj->_common_validate_with( params => \@verify_params, additional_spec => {node => {type => SCALAR, optional => 1}}, ignore_primary_keys => 1, allow_extra => 1 ); my %common_opts; $pkg_or_obj->_copy_common_component_params( source => \%opts, target => \%common_opts ); # Populate the per role default MTU and required fields # if not specified. foreach my $role (@{$opts{roles}}) { if (!defined $opts{default_mtu_per_role}{$role}) { if (!defined $opts{default_mtu}) { $Log->comment( "No default_mtu is specified for Lif Role $role, " . "defaulting to $default_mtu"); $opts{default_mtu_per_role}{$role} = $default_mtu; } else { $opts{default_mtu_per_role}{$role} = $opts{default_mtu}; } } ## end if ( !defined $opts{default_mtu_per_role... if (!defined $opts{required_fields_per_role}{$role}) { if (!defined $opts{required_fields}) { $Log->comment( "No required_fields were specified for Lif Role $role, " . "defaulting to @$default_required_fields"); $opts{required_fields_per_role}{$role} = $default_required_fields; } else { $opts{required_fields_per_role}{$role} = $opts{required_fields}; } } ## end if ( !defined $opts{required_fields_per_role... } ## end foreach my $role ( @{ $opts... # If someone asks for CLUSTER_MGMT, but these are not defined in the hostrec, # we will need the MGMT (for last 2 octets of IP) and DATA (for port) roles to # build the defaults my @requested_roles = @{$opts{roles}}; my @families = @{$opts{families}}; my @roles = @requested_roles; my $roles = join("|", @roles); if (!grep(/^MGMT$/, @roles)) { push(@roles, "MGMT"); } if (!grep(/^DATA$/, @roles)) { push(@roles, "DATA"); } my $hostrec_filename = $cmd_int->hostrec()->{filename}; my $node = $cmd_int->node(); my %lifinfo; # Only want the default to be verbose the first time this is called. # If caller specified no preference, figure out to default this. if ($opts{verbose_checks} < 0) { # if we've called this before, turn default off, else turn it on if (exists $_hostrec_lif_info_verbose_checks_default{$node}) { $opts{verbose_checks} = 0; } else { $opts{verbose_checks} = 1; } } ## end if ( $opts{verbose_checks... # turn default off for subsequent calls $_hostrec_lif_info_verbose_checks_default{$node} = 0; if ($opts{source} eq 'scalar_vars') { # Retrieve all host configuration related to lifs # NOTE: MGMT_IP or CLUSTER_MGMT_IP don't have a numeric suffix my @keys = sort grep(/^($roles)_(IP|PORT|MTU|MASK|GATEWAY|NAME|CONFIG)\d*$/, keys %{$cmd_int->hostrec()}); foreach my $key (@keys) { # Note: For MGMT_ or CLUSTER_MGMT, instance will be undef... $key =~ /(\d+)$/; my $instance = $1; # make zero relative if ($instance) { $instance--; } else { # want $instance to be 0 instead of undef $instance = 0; } $key =~ /^(\S+)_(IP|PORT|MTU|MASK|GATEWAY|NAME|CONFIG)/; my $preface = $1; if ($key =~ /CONFIG/) { # Alternate format where the config # is more like what is used in CFE # Ex: CLUSTER_CONFIG1="port=e0a address=172.31.8.18 # netmask=255.255.192.0 gateway=172.31.0.1 mtu=1500" foreach my $pair (split(/\s+/, $cmd_int->hostrec()->{$key})) { my ($pair_key, $pair_value) = split(/=/, $pair); $pair_key = uc $pair_key; if ($pair_key =~ /ADDRESS/) { $lifinfo{$preface}[$instance]{IP} = $pair_value; } elsif ($pair_key =~ /NETMASK/) { $lifinfo{$preface}[$instance]{MASK} = $pair_value; } else { $lifinfo{$preface}[$instance]{$pair_key} = $pair_value; } } ## end foreach my $pair ( split( /\s+/... } elsif ($key =~ /(MGMT|RLM)_(\S+)/) { $lifinfo{$preface}[0]{$2} = $cmd_int->hostrec()->{$key}; } elsif ($key =~ /^(INTERCLUSTER|CLUSTER|DATA)_([A-Z]+)(\d+)$/) { $lifinfo{$preface}[$instance]{$2} = $cmd_int->hostrec()->{$key}; } } ## end foreach my $key (@keys) } else { my %nettype_converter = ( management => 'MGMT', data => 'DATA', cluster => 'CLUSTER', cluster_management => 'CLUSTER_MGMT', intercluster => 'INTERCLUSTER', rlm => 'RLM', ); my %field_converter = ( address => 'IP', interface => 'PORT', netmask => 'MASK', gateway => 'GATEWAY', name => 'NAME', mtu => 'MTU', ); my %instance; foreach my $family (@families) { my @networks = $cmd_int->hostrec() ->net_filter(include => {family => $family}); # I want interfaces sorted by netid @networks = sort { $a->{netid} cmp $b->{netid} } @networks; foreach my $lif (@networks) { my $nettype = $nettype_converter{$lif->{nettype}}; my $netid = $lif->{netid}; if ($nettype && $netid && ($netid !~ /^default/)) { # Skip default and default_gx, etc... if ( ($nettype eq "management") && ($netid =~ /^default/)) { next; } if (exists $instance{$nettype}) { $instance{$nettype}++; } else { $instance{$nettype} = 0; } my $instance = $instance{$nettype}; # In the good-ole days of scalar vars there would be # only one DATA_IP1, and so the inet4 records which # have been converted from scalars will all have # netids of inet4_data1, inet4_data2, etc... # However, nothing stops us from entering # new "net" records of inet4_mydata1 or # inet6_data1, so the instance number is not # always unique for a given nettype. $lifinfo{$nettype}[$instance]{NETID} = $netid; foreach my $nfield (keys %$lif) { my $field = $field_converter{$nfield}; if ($field) { $lifinfo{$nettype}[$instance]{$field} = $lif->{$nfield}; } } } } } } # If ipspaces change is available, we must use them to determine which # ports are appropriate for a given lif role. Unfortunately development # chose to continue to return port roles, but not to keep them in sync # with ipspace changes, so a returned port role may not be valid. # IpSpaces are fairly simple with regard to lif compatability. # cluster lifs must be created on ports in the pre-defined Cluster # ipspace and all other lifs can be created on any of the ports # associated with the other ipspaces. my %ports; my @ipspaces; my %ipspace_ports_by_type; my $version_manager = $cmd_int->get_version_manager(); if ($version_manager->has_uichange(uichange => 'ipspaces-ms4')) { @ipspaces = NACL::CS::NetworkIpspace->fetch( command_interface => $cmd_int, %common_opts ); $ipspace_ports_by_type{CLUSTER} = []; $ipspace_ports_by_type{NONCLUSTER} = []; foreach my $ipspace (@ipspaces) { # The ports are in the form node:port # we only want the ports which correspond to # this node my @ports; foreach my $pspec ($ipspace->ports()) { if ($pspec =~ /$node:(\S+)/) { push(@ports, $1); } } if ($ipspace->ipspace() =~ /^CLUSTER$/i) { push(@{$ipspace_ports_by_type{CLUSTER}}, @ports); } else { push(@{$ipspace_ports_by_type{NONCLUSTER}}, @ports); } } foreach my $role (@{$opts{roles}}) { if ($role =~ /^CLUSTER$/i) { push(@{$ports{$role}}, @{$ipspace_ports_by_type{CLUSTER}}); } elsif ($role !~ /^RLM$/i) { push(@{$ports{$role}}, @{$ipspace_ports_by_type{NONCLUSTER}}); } } } else { # Retrieve existing ports on this node in case we need to # default missing port definitions. Cluster ports are often # missing from the NATE hosts file my @ports = NACL::CS::NetworkPort->fetch( command_interface => $cmd_int, filter => {node => $node}, %common_opts ); foreach my $entry (@ports) { my $role = $entry->role(); # node mgmt role is actually 'node-mgmt' if ($role =~ /mgmt/i) { $role = "mgmt"; } $role = uc $role; if (exists $ports{$role}) { push(@{$ports{$role}}, $entry->port()); } else { $ports{$role} = [$entry->port()]; } } ## end foreach my $entry (@ports) } # If no ports of these types are found, apply reasonable defaults if (!exists $ports{CLUSTER}) { $ports{CLUSTER} = [qw( e0a e0b )]; } if (!exists $ports{DATA}) { $ports{DATA} = [qw( e0c e0d )]; } if (!exists $ports{MGMT}) { $ports{MGMT} = [$ports{DATA}[0]]; } if (!exists $ports{RLM}) { $ports{RLM} = []; } # Go through each LIF definition, and default what we can foreach my $role (keys %lifinfo) { my $instance = 0; my $port_index = 0; foreach my $entry (@{$lifinfo{$role}}) { my $suffix = ""; if ($role =~ /^(INTERCLUSTER|CLUSTER|DATA)$/) { $suffix = $instance + 1; if (exists $lifinfo{$role}[$instance]{NETID}) { # The user has the choice of picking NAME which will be # constructed using gx_boot/deux_init naming rules # or NETID which is what was either auto-generated from # scalar to net record format, OR just going with the netid # from the net record if ($lifinfo{$role}[$instance]{NETID} =~ /(\d+)$/) { $suffix = $1; } } } # FAMILY will not be specified if using "scalar_vars" # definitions, but would be for "net" if (!exists $lifinfo{$role}[$instance]{FAMILY}) { $lifinfo{$role}[$instance]{FAMILY} = Hostrec::_guess_family($lifinfo{$role}[$instance]{IP}); } ## end if ( !exists $lifinfo{... # Generally no one ever specifies NAME in a NATE hosts file, # but it could be if (!exists $lifinfo{$role}[$instance]{NAME}) { my $name; if ($role =~ /INTERCLUSTER/i) { $name = "inter_clus" . $suffix; } elsif ($role =~ /CLUSTER_MGMT/i) { $name = "cluster_mgmt"; } elsif ($role =~ /CLUSTER/i) { $name = "clus" . $suffix; } elsif ($role =~ /MGMT/i) { $name = "mgmt" . $suffix; } elsif ($role =~ /DATA/i) { $name = $node . '_' . (lc $role) . $suffix; } else { $name = (lc $role) . $suffix; } $lifinfo{$role}[$instance]{NAME} = $name; } ## end if ( !exists $lifinfo{... # If no PORT was specified, try to default if (!exists $lifinfo{$role}[$instance]{PORT}) { # Say we have more lif definitions than ports of # that type, round-robin the default port assignments # i.e. we have 4 data lifs defined, but only 2 data ports if (!exists $ports{$role}[$port_index]) { $port_index = 0; } if (exists $ports{$role}[$port_index]) { if ($opts{verbose_checks} && grep(/^$role$/, @requested_roles)) { if ($role !~ /RLM/i) { $Log->comment( "$me - No NATE host definition for node $node was found for " . uc($role) . "_PORT" . $suffix . ". Defaulting to " . $ports{$role}[$port_index] . "."); } } ## end if ( $opts{verbose_checks... $lifinfo{$role}[$instance]{PORT} = $ports{$role}[$port_index]; } else { # Ports are not applicable for RLM definitions if ($role !~ /RLM/i) { # If no port and we can't figure out a reasonable default, throw # an exception. NATE::BaseException->throw($role . "_PORT" . $suffix . " is not defined for node $node, and" . " no ports of that role exist on the node" . " to be used as defaults."); } } ## end else [ if ( exists $ports{$role... } ## end if ( !exists $lifinfo{... if (!exists $lifinfo{$role}[$instance]{MTU}) { if ($opts{verbose_checks} && grep(/^$role$/, @requested_roles)) { $Log->comment( "$me - No NATE host definition for node $node was found for " . uc($role) . "_MTU" . $suffix . ". Defaulting to " . $opts{default_mtu_per_role}{$role} . "."); } ## end if ( $opts{verbose_checks... $lifinfo{$role}[$instance]{MTU} = "$opts{default_mtu_per_role}{$role}"; } ## end if ( !exists $lifinfo{... $instance++; $port_index++; } ## end foreach my $entry ( @{ $lifinfo... } ## end foreach my $role ( keys %lifinfo) if (!defined $lifinfo{CLUSTER_MGMT}) { my $mgmt_ip = $lifinfo{MGMT}[0]{IP}; my @octets = split(/\./, $mgmt_ip); # Only if no entry exists in the nate host file # 172.16 and the last 2 octets of the MGMT_IP $lifinfo{CLUSTER_MGMT}[0]{IP} = "172.16.$octets[2].$octets[3]"; $lifinfo{CLUSTER_MGMT}[0]{MASK} = "255.255.255.0"; $lifinfo{CLUSTER_MGMT}[0]{GATEWAY} = "172.16.$octets[2].$octets[3]"; $lifinfo{CLUSTER_MGMT}[0]{PORT} = $lifinfo{DATA}[0]{PORT}; $lifinfo{CLUSTER_MGMT}[0]{NAME} = "cluster_mgmt"; $lifinfo{CLUSTER_MGMT}[0]{FAMILY} = "inet4"; $lifinfo{CLUSTER_MGMT}[0]{MTU} = "$opts{default_mtu_per_role}{CLUSTER_MGMT}"; if ($opts{verbose_checks}) { $Log->comment( "$me - No NATE host CLUSTER_MGMT definitions for node $node were found. Defaulting to:\n" . Dumper($lifinfo{CLUSTER_MGMT}[0])); } } ## end if ( !defined $lifinfo... # If the user only asked for CLUSTER_MGMT lif info, we added MGMT and # DATA to be used in generation of default values, but since the user # didn't ask for these, we'll remove them from what we return to them. foreach my $role (keys %lifinfo) { if (!grep(/^$role$/, @requested_roles)) { delete $lifinfo{$role}; } } my @errors; foreach my $role (@requested_roles) { if (!defined $lifinfo{$role}) { my $msg = "No NATE host definitions for node $node were found for requested " . uc($role) . " lifs."; if ($opts{if_role_missing} eq 'die') { push(@errors, $msg); } else { if ($opts{verbose_checks}) { $Log->comment($msg); } } } ## end if ( !defined $lifinfo... } ## end foreach my $role (@requested_roles) # Now do final check to see that we have all of the required # parameters defined foreach my $role (keys %lifinfo) { my $instance = 0; foreach my $entry (@{$lifinfo{$role}}) { my $suffix = ""; if ($role =~ /^(INTERCLUSTER|CLUSTER|DATA)$/) { $suffix = $instance + 1; } foreach my $req (@{$opts{required_fields_per_role}{$role}}) { if (!exists $lifinfo{$role}[$instance]{$req}) { if ( ($req !~ /PORT/i) && ($role !~ /RLM/)) { push(@errors, "No NATE host definition for node $node was found for " . uc($role) . "_" . $req . $suffix); } } ## end if ( !exists $lifinfo{... } ## end foreach my $req ( @{ $opts{... $instance++; } ## end foreach my $entry ( @{ $lifinfo... } ## end foreach my $role ( keys %lifinfo) if (@errors) { NATE::BaseException->throw(CORE::join("\n", @errors) . "\n Please check NATE host file: $hostrec_filename.\n"); } $Log->exit() if $may_exit; return \%lifinfo; } ## end sub get_hostrec_lif_info sub panic { $Log->enter() if $may_enter; my $pkg_or_obj = shift; my %opts = $pkg_or_obj->_common_validate_with( params => \@_, additional_spec => { node => {type => SCALAR|OBJECT, optional => 1}, nacltask_reboot => {type => BOOLEAN, default => 1}, nacltask_coredump => {type => BOOLEAN, default => 1}, }, ignore_primary_keys => 1, ); my $command_interface = $opts{command_interface} || $pkg_or_obj; $command_interface = ref($pkg_or_obj) ? $pkg_or_obj : $command_interface; my $node = delete $opts{node} || $command_interface->node(); my $reboot = delete $opts{nacltask_reboot}; my $coredump = delete $opts{nacltask_coredump}; my $timeout = delete $opts{'method-timeout'} || 7200; # Local variables my $apiset = undef; my ($node_name , $node_obj); # Create an apiset object to issue the panic command. if( blessed $node){ $node_name = $node->node(); $node_obj = $node; }else{ $node_name = $node; $node_obj = NACL::C::Node->new(node => $node_name); } # if we are doing a reboot after the panic, we need to # determine the partner of this node, my $partner_node ; if($reboot){ try{ $partner_node = NACL::STask::Node->get_partner_obj( "method-timeout" => $timeout, command_interface => $node_obj, ); }catch NACL::Exceptions::HighAvailabilityError with{ $Log->warn('The HA partner of the node '.$node_name.' is not available. It may be possible that its a single node cluster or the node doesnt have the HA partner'); }; } if ($node_name eq $command_interface->node()) { $apiset = $command_interface->apiset( category => 'Node', interface => 'CLI', set => 'Systemshell' ); } else { $apiset = NACL::APISet->new( hostobj => Tharn::host($node_name), category => 'Node', interface => 'CLI', set => 'Systemshell', ); } $Log->trace("Causing a sysctl panic on node $node"); # Configure node to skip core dump on manually induced panic if required. # This setting is non-persistent and has effect only on next upcoming panic if ( ! $coredump ) { $Log->trace('Configuring node to skip core dump on panic'); $apiset->sysctl( name => 'debug.dont_dump_core', value => 1, 'privilege-level' => 'root' ); } # See burts 752140 and 744259 for background. The "ignore_dbh" # is "ignore did_bad_happen" -- which is the cue for did_bad_happen # to ignore this panic because it's user-initiated. try { $apiset->sysctl( name => 'debug.panic', value => '"ignore_dbh TEST INDUCED PANIC. DO NOT FILE NEW BURT"', 'privilege-level' => 'root', 'connectrec-timeout' => 5, 'connectrec-on_timeout' => sub { NACL::APISet::Exceptions::TimeoutException->throw( 'Timeout Exception. See BURT 810835'); } ); } catch NACL::APISet::Exceptions::TimeoutException with { my $exception = shift; $Log->trace('Ignoring expected connection timeout on sysctl panic: ' . $exception->text() ); } catch NATE::BaseException with { my $exception = shift; # Handle the timeout exception. Re-throw if it's not a timeout exception if ( $exception->text() =~ /Timeout waiting for command to complete/ ) { $Log->trace('Ignoring expected connection timeout on sysctl panic: ' . $exception->text() ); } else { $exception->throw(); } }; # Reboot the node after the panic if the reboot option is set. my $transit_obj ; if ($reboot) { $transit_obj = NACL::Transit->new(name => $node_name); my $transit_opts = { 'command_interface' => $node_obj, 'node' => $node_obj, 'method-timeout' => $timeout, 'partner' => $partner_node, 'transit_obj' => $transit_obj, }; $pkg_or_obj->get_package_name()->recover_from_waiting_for_giveback(%{$transit_opts}); } $Log->exit() if $may_exit; } # end panic() =head2 rename $node->rename( newname => $string,); or NACL::STask::Node->rename( command_interface => $command_interface, node => $node, newname => $string, "method-timeout" => $timeout, nacltask_to_cleanup => 1, nacltask_cleanup_manager => $cleanupobj, ); (Class or instance method) This method performs the system node rename command for Cmode filer . =over =item Options =over =item C<< command_interface=>$command_interface >> (Required for class method, Not Applicable for instance method) See L =item C<< nacltask_wait_for_rename=>0|1 >> (Optional) If true (the default), wait for node rename job completion . If false, do not wait. =item C<< nacltask_polling_interval=>$polling_interval >> (Optional) If nacltask_wait is set to 1, this is the interval at which to poll the filer for the value of the "node" field. =item C<< method-timeout=>$timeout >> (Optional) The timeout value for the wait_on_attribute method which waits until the new value of the node is changed . Default :: 500 . =item C<< nacltask_to_cleanup => 0|1 >> (Optional, defaults to 0(not to cleanup)) Flag indicating if this operation needs to be registered for clean up or not. =item C<< nacltask_cleanup_manager >> Cleanup manager to use for registering Default : will use the default cleanup manager. =item apiset_must, apiset_should etc. All of the other various options required for the method are described in L<< NACL::C::Node->rename | lib-NACL-C-Node-pm/rename >> =back =back =over =item Exceptions =over =item C This type of exception when rename operation has not happened within the TIMEOUT value. =back =back =cut sub rename { $Log->enter() if $may_enter; my $pkg_or_obj = shift; my $pkg = $pkg_or_obj->get_package_name(); my %opts = $pkg_or_obj->_common_validate_with( params => \@_, additional_spec => { nacltask_wait_for_rename => {type => SCALAR, default => 1}, "method-timeout" => {type => SCALAR, default => 500}, nacltask_polling_interval => {type => SCALAR, default => 20}, $pkg_or_obj->_cleanup_validate_spec(), }, allow_extra => 1, ); # Transform %opts from the options we received into the options to # pass to the base class method. my %common_opts; my $command_interface = $opts{command_interface}; my $till_value = $opts{newname}; my $wait = delete $opts{nacltask_wait_for_rename}; # get the 'nacltask_wait' value my $timeout = delete $opts{"method-timeout"}; my $polling_interval = delete $opts{nacltask_polling_interval}; #copying the common parameters for the component call $pkg_or_obj->_copy_common_component_params_with_ci( source => \%opts, target => \%common_opts, ); my ( %nacltask_options, $nacltask_to_cleanup, %opts_for_cleanup ); $pkg_or_obj->_copy_common_opts_for_cleanup( 'source' => \%opts, 'target' => \%opts_for_cleanup, 'nacltask_to_cleanup' => \$nacltask_to_cleanup, 'to_cleanup' => 'rename' ); $pkg_or_obj->SUPER::rename(%opts); my $end_time = time() + $timeout; if ($wait) { # if wait =1 then wait for rename while (1) { if ($pkg->find( command_interface => $command_interface, filter => {node => $till_value}, allow_empty => 1, %common_opts, ) ) { last; } else { if (time() > $end_time) { $Log->exit() if $may_exit; NACL::APISet::Exceptions::TimeoutException->throw( "The rename has not happened within the TIMEOUT value = $end_time" ); } Tharn::snooze($polling_interval); } } $Log->exit() if $may_exit; } if ($nacltask_to_cleanup) { $opts_for_cleanup{'new_opts'} = { 'newname' => $opts{'newname'} }; $opts_for_cleanup{'orig_opts'} = { 'newname' => $opts{'node'} }; $opts_for_cleanup{'node'} = $opts{'newname'}; $pkg_or_obj->_register_for_cleanup(%opts_for_cleanup); } $Log->exit() if $may_exit; } ## end sub rename =head2 set_current_datetime B ONTAP now uses a cluster-wide date with the date synchronized across the nodes by NTP, so using this method should be unnecessary. If it is really necessary to change the date/time for CMode, use L. NACL::STask::Node->set_current_datetime( command_interface => $Node_ci, node => $node_name, %common_options ); # Instance method call applicable only for 7Mode. $Node_ci->set_current_datetime(); (Class or Instance method) Set the current date and time on the node. =over =item Options =over =item C<< command_interface => $command_interface >> (Required for class method, Not Applicable for instance method) See L =item C<< "node" => $node >> Node name. =item C<< apiset_must => $ruleset >> (Optional) See L =item C<< apiset_should => $ruleset >> (Optional) See L =item C<< 'method-timeout' => $time_in_seconds >> (Optional) The default NATE timeout is 60 seconds. If the command might take more time to complete, this option can be used to specify a larger timeout value. The value should be provided in seconds. =back =back =over =item Exceptions =over =item C This type of exception is thrown when there is failure while congiguring ntp on a particular node. =back =back =cut sub set_current_datetime { $Log->enter() if $may_enter; my ($pkg_or_obj, %opts) = @_; if (ref $pkg_or_obj && (!exists($opts{command_interface}))) { $opts{command_interface} = $pkg_or_obj->command_interface; } validate_with( params => \%opts, spec => { %{$pkg_or_obj->_common_validate_spec}, 'node' => {type => SCALAR, optional => 1} } ); if ($opts{command_interface}->is_cmode()) { $Log->warn('Usage of NACL::STask::Node::set_current_datetime is ' . 'deprecated for CMode. Date synchronization is handled ' . 'by NTP, so this call should no longer be needed'); } $pkg_or_obj->_sync_clocks(%opts); $Log->exit() if $may_exit; } ## end sub set_current_datetime # ----------------------------------------------- # Function: _sync_clocks # # Synchronize the clocks of all cluster nodes for # a vsim/filer with those of the client running this routine. # # Usage: # # $obj->_sync_clocks(); # # Parameters: None # # Return: Nothing # ----------------------------------------------- sub _sync_clocks { my ($pkg_or_obj, %opts) = @_; $Log->enter() if $may_enter; my $ci = $opts{command_interface}; if ($opts{command_interface}->is_cmode()) { $Log->warn('Usage of NACL::STask::Node::_sync_clocks is ' . 'deprecated for CMode. Date synchronization is handled ' . 'by NTP, so this call should no longer be needed'); } my $older = 1; my (@nodes, %common_options); $pkg_or_obj->_copy_common_component_params( source => \%opts, target => \%common_options ); if ($ci->is_cmode) { $older = 0 if ($ci->has_uichange(uichange => 'date-system-node-to-cluster')); if ($older) { if (defined $opts{node} && $opts{node} ne "*") { my $node = NACL::STask::Node->new(node => $opts{node}); push(@nodes, $node); } else { @nodes = NACL::STask::Cluster->node_objs( command_interface => $ci); } foreach my $node_obj (@nodes) { $node_obj->_set_ntp(enabled => 0, %common_options); } } } else { $pkg_or_obj->_set_ntp( enabled => 0, %common_options, command_interface => $ci ); } my ($sec, $min, $hour, $day, $mon, $year, undef) = gmtime(); $year += 1900; $mon++; my $now = sprintf( "\"%4d%02d%02d%02d%02d.%02d\"", $year, $mon, $day, $hour, $min, $sec ); $Log->debug("Attempting to set date to $now on all the nodes "); NACL::C::SystemNodeDate->modify( 'node' => "*", %opts, # User specified 'node' option will override 'utcdateandtime' => $now ); if ($older) { if ($ci->is_cmode) { foreach my $node_obj (@nodes) { $node_obj->_set_ntp(enabled => 1, %common_options); } } else { $pkg_or_obj->_set_ntp( enabled => 1, %common_options, command_interface => $ci ); } } $Log->exit() if $may_exit; } ## end sub _sync_clocks # ----------------------------------------------- # Function: _set_ntp # # Enable or disable NTP (Network Time Protocol) # Used in the clock syncing process. # # Usage: # # $obj->_set_ntp( enable => 0 ); # # Parameters: # # enable => [0|1] (default 1) # # Return: Nothing # ----------------------------------------------- sub _set_ntp { my ($pkg_or_obj, %opts) = @_; $Log->enter() if $may_enter; if (ref $pkg_or_obj && (!exists($opts{command_interface}))) { $opts{command_interface} = $pkg_or_obj->command_interface; } if ($opts{command_interface}->is_cmode()) { $Log->warn('Usage of NACL::STask::Node::_set_ntp is ' . 'deprecated for CMode.'); } my $action = $opts{enabled} ? "Enabling" : "Disabling"; $opts{enabled} = $opts{enabled} ? "true" : "false"; my $retries = 3; RETRY: $Log->debug($action . " NTP service on all nodes"); use warnings; try { NACL::C::SystemServicesNtpConfig->modify(%opts); } catch NACL::APISet::Exceptions::CommandFailedException with { my $ex = shift; if ($retries) { $Log->warn("Failed with error " . $ex->text()); $Log->warn("Retrying... "); $retries--; Tharn::snooze(5); no warnings qw(exiting); goto RETRY; } else { $Log->exit() if $may_exit; $ex->throw(); } }; $Log->exit() if $may_exit; } ## end sub _set_ntp =head2 setflags $flag_arrref = [{'xxx' => 1},{'yyy' => '2'}]; NACL::STask::Node->setflags( command_interface => $command_interface, "method-timeout" => $timeout, "flags" => $flag_arrref, continue_on_failure => 1 ); $NODE_STASK_OBJ->setflags( flags => $flag_arrref ); (Class or Instance method) This Method sets the various flags and their values on the node. =over =item Options =over =item C<< command_interface => $command_interface >> (Required for class method, Not Applicable for instance method) See L =item C<< 'method-timeout' => $timeout >> (Optional) The default NATE timeout is 60 seconds. If the command might take more time to complete, this option can be used to specify a larger timeout value. The value should be provided in seconds. =item C<< "flags" => \@ >> Flags names and their values as an array reference. =item C<< continue_on_failure => 1 >> This boolean value decides whether an invalid flag set will throw a warning or fails the method. In case if a warning is thrown the other flags will continue to get executed. Default value is 0. =back =back =over =item Exceptions =over =item C This type of exception is thrown when we try to set an invalid flag on a particular node. =item C This type of exception is thrown when we try to set an invalid flag value on a valid flag on a particular node. =back =back =cut sub setflags{ $Log->enter() if $may_enter; my ( $pkg_or_obj, @args ) = @_; # validate the parameters my %opts = $pkg_or_obj->_common_validate_with( params => \@args, additional_spec => { flags => { type => ARRAYREF, optional => 0 }, continue_on_failure => { type => BOOLEAN, default => 0 }, }, ); my @flag_arr = @{delete $opts{flags}}; foreach my $flag_hash (@flag_arr){ foreach my $key (keys %{$flag_hash}){ try{ $Log->debug('code check'); NACL::C::Node->setflag(command_interface => $opts{command_interface}, node => $opts{command_interface}->node, flag => $key, value => $flag_hash->{$key} ); }catch NACL::APISet::Exceptions::ResponseException with{ my $exception = shift; if($opts{continue_on_failure}){ $Log->warn('Failed to set the flag '.$key.' with error: '. $exception->text() ); }else{ $exception->throw(); } } } } $Log->exit() if $may_exit; } ## end sub _set_ntp =head2 panic_nodes @nodes = [node1,node2,...]; NACL::STask::Node->panic_nodes() command_interface => $command_interface, nodes => @nodes, #default *(all nodes) nacltask_coredump => 0, #default 1 "method-timeout" => $timeout, ); $NODE_STASK_OBJ->panic_nodes(nodes => @nodes); $NODE_STASK_OBJ->panic_nodes(); default to all nodes (Class or instance method) This method causes a system node reboot -dump true on one or all nodes =over =item Options =over =item C<< "method-timeout"=>$timeout >> (Optional) How long in seconds to wait for the Transit call that waits for the panicking nodes to come back online. Defaults to 7200 seconds. =item C<< command_interface=>$command_interface >> (Required for class method, Not Applicable for instance method) See L =item C<< nodes => @nodes >> (Optional) The hostname of the Nodes in ARRAYREF. ARRAYREF should contain objects of type NACL::C::CommandInterface::ONTAP By default if no value is supplied to ARRAYREF of nodes it will be defaulted to all nodes. =item C<< nacltask_coredump=>0|1 >> (Optional) If 1 (the default), panic the node with a core dump If 0, panic the node without a core dump, =back =back =over =item Exceptions =over =item C This type of exception is thrown when the wait for node/nodes to come online times out. =back =back =cut sub panic_nodes{ $Log->enter() if $may_enter; my ( $pkg_or_obj, @args ) = @_; my %opts = $pkg_or_obj->_common_validate_with( params => \@args, additional_spec => { nodes => {type => ARRAYREF, optional => 1}, nacltask_coredump => {type => BOOLEAN, default => 1}, "method-timeout" => {type => SCALAR, default => 7200}, }, ignore_primary_keys => 1, ); my $command_interface = $opts{command_interface} || $pkg_or_obj; my @nodes; if(!$opts{nodes}){ @nodes = NACL::C::Cluster->node_objs(command_interface=>$command_interface); }else{ @nodes = @{delete $opts{nodes}}; } my $coredump = delete $opts{nacltask_coredump}; my $timeout = delete $opts{'method-timeout'} ; my ($node, @procs, $run_id_str, $run_id); while(@nodes){ $node = shift(@nodes); my %panic_opts = ( command_interface => $node, node => $node, 'method-timeout' => $timeout ); $panic_opts{nacltask_coredump} = 0 if ( ! $coredump ); my $proc = NATE::Process->new( codespec => $pkg_or_obj->can('panic'), runid => "panic_node_" . $node->node(), args => [ $pkg_or_obj, %panic_opts], onexit => \&NATE::Process::on_exit_die_worst_result, ); $proc->start; push (@procs, $proc); } _wait_for_panic_reboot(\@procs, $timeout); $Log->exit() if $may_exit; } =head2 reboot_nodes @nodes = [node1,node2,...]; NACL::STask::Node->reboot_nodes() command_interface => $command_interface, nodes => @nodes, #default *(all nodes) "method-timeout" => $timeout, ); $NODE_STASK_OBJ->reboot_nodes(nodes => @nodes); $NODE_STASK_OBJ->reboot_nodes(); default to all nodes (Class or instance method) This method causes a system node reboot on one or all nodes =over =item Options =over =item C<< "method-timeout"=>$timeout >> (Optional) How long in seconds to wait for the Transit call that waits for the panicking nodes to come back online. Defaults to 7200 seconds. =item C<< command_interface=>$command_interface >> (Required for class method, Not Applicable for instance method) See L =item C<< nodes => @nodes >> (Optional) The hostname of the Nodes in ARRAYREF. ARRAYREF should contain objects of type NACL::C::CommandInterface::ONTAP By default if no value is supplied to ARRAYREF of nodes it will be defaulted to all nodes. =back =back =over =item Exceptions =over =item C This type of exception is thrown when the wait for node/nodes to come online times out. =back =back =cut sub reboot_nodes{ $Log->enter() if $may_enter; my ( $pkg_or_obj, @args ) = @_; my %opts = $pkg_or_obj->_common_validate_with( params => \@args, additional_spec => { nodes => {type => ARRAYREF, optional => 1}, "method-timeout" => {type => SCALAR, default => 1800}, }, ignore_primary_keys => 1, allow_extra => 1, ); my $command_interface = $opts{command_interface} || $pkg_or_obj; my @nodes ; if(!$opts{nodes}){ @nodes = NACL::C::Cluster->node_objs(command_interface=>$command_interface); }else{ @nodes = @{delete $opts{nodes}}; } my $timeout = $opts{'method-timeout'} ; my ($node,$apiset,@procs); while(@nodes){ $node = shift(@nodes); $opts{command_interface} = $node ; $opts{node} = $node ; my $proc = NATE::Process->new( codespec => $pkg_or_obj->can('reboot'), runid => "reboot_filer_".$node->node(), args => [ $pkg_or_obj , %opts], onexit => \&NATE::Process::on_exit_die_worst_result, ); $proc->start; push (@procs, $proc); } _wait_for_panic_reboot(\@procs, $timeout); $Log->exit() if $may_exit; } =head2 reboot $node->reboot(); NACL::STask::Node->reboot( command_interface => $command_interface, node => $node, "method-timeout" => $timeout, ); (Class or instance method) This method causes reboot of the node. It then try to bring it up using transit->change_state(to => 'CLI'). This method also try to recover the node if its taken over by the partner. =over =item Options =over =item C<< "method-timeout"=>$timeout >> (Optional) How long in seconds to wait for the Transit call to reboot. Defaults to 1000 seconds. =item C<< node >> (Optional for Class Method only. Not Applicable for instance method) Specifies the node which has to be rebooted. It can be a scalar value or object of type NACL::C::CommandInterface::ONTAP. If not specified, the node corresponding to the command_interface is panicked. =item C<< command_interface=>$command_interface >> (Required for class method, Not Applicable for instance method) See L =back =back =cut sub reboot{ $Log->enter() if $may_enter; my $pkg_or_obj = shift; my %opts = $pkg_or_obj->_common_validate_with( params => \@_, additional_spec => { node => {type => SCALAR|OBJECT, optional => 1}, "method-timeout" => {type => SCALAR, default => 1800}, }, ignore_primary_keys => 1, allow_extra => 1, ); my $command_interface = $opts{command_interface} || $pkg_or_obj; my $node = delete $opts{node} || $command_interface->node(); my $timeout = $opts{'method-timeout'} ; my ($node_name, $node_obj); if(blessed $node){ $node_name = $node->node(); $node_obj = $node; }else{ $node_name = $node; $node_obj = NACL::C::Node->new(node => $node_name); } my $partner_node ; try{ $partner_node = NACL::STask::Node->get_partner_obj( "method-timeout" => $timeout, command_interface => $node_obj, ); }catch NACL::Exceptions::HighAvailabilityError with{ $Log->warn('The HA partner of the node '.$node_name.' is not available, It may be possible that its a single node cluster or the node doesnt have the HA partner'); }; ## calling component reboot api $opts{node} = $node_obj; $pkg_or_obj->SUPER::reboot(%opts); my $transit_obj = NACL::Transit->new(name => $node_name); ## After the reboot it may be possible that node goes to the two state: ## 1. LOADER prompt -> FIRMWARE ## 2. Waiting for giveback -> TAKEN_OVER_WAITING_ONTAP my $transit_opts = { command_interface => $node_obj, 'node' => $node_obj, 'method-timeout' => $timeout, 'partner' => $partner_node, 'transit_obj' => $transit_obj, }; $pkg_or_obj->get_package_name()->recover_from_waiting_for_giveback(%{$transit_opts}); $node_obj->refresh_command_interface(); $Log->exit() if $may_exit; } =head2 recover_from_waiting_for_giveback $node->recover_from_waiting_for_giveback(%opts); NACL::STask::Node->recover_from_waiting_for_giveback( command_interface => $command_interface, node => $node, partner => $partner, "method-timeout" => $timeout, transit_obj => $transit_obj ); (Class or instance method) This method causes recovery of the node which is in giveback state. =over =item Options =over =item C<< "method-timeout"=>$timeout >> (Optional) How long in seconds to wait for the Transit call to reboot. Defaults to 1000 seconds. =item C<< node >> (Optional for Class Method only. Not Applicable for instance method) Specifies the node of type NACL::C::CommandInterface::ONTAP which has to be recovered from waiting for giveback mode. =item C<< partner >> (Optional for Class Method only. Not Applicable for instance method) Specifies the partner node of type NACL::C::CommandInterface::ONTAP which has to issue giveback to the node. =item C<< transit_obj >> (Optional for Class Method only. Not Applicable for instance method) Specifies the transit object for the node which is in takeover state. =item C<< command_interface=>$command_interface >> (Required for class method, Not Applicable for instance method) See L =back =back =cut sub recover_from_waiting_for_giveback { $Log->enter() if $may_enter; my $pkg_or_obj = shift; my %opts = $pkg_or_obj->_common_validate_with( params => \@_, additional_spec => { node => { type => OBJECT|SCALAR , optional => 1}, partner => { type => OBJECT | UNDEF, optional => 1 }, "method-timeout" => {type => SCALAR, default => 1800}, 'transit_obj' => {isa => 'NACL::Transit', type => OBJECT, optional => 1 }, nacltask_skip_auto_giveback => {type => SCALAR, default => 1}, 'require-partner-waiting' => { type => SCALAR | UNDEF, default => "false" }, 'override-vetoes' => { type => SCALAR | UNDEF, default => "true" }, nacltask_verify => { type => SCALAR, default => 0 }, }, allow_extra => 1 ); my $command_interface = delete $opts{command_interface}; my $node = delete $opts{node} || $command_interface->node(); my ($node_name , $node_obj); if( blessed $node){ $node_name = $node->node(); $node_obj = $node; }else{ $node_name = $node; $node_obj = NACL::C::Node->new(node => $node_name); } my $partner = delete $opts{partner} ; my $timeout = delete $opts{'method-timeout'} ; my $transit_obj = delete $opts{transit_obj} || NACL::Transit->new(name => $node_name); my $state; try { $transit_obj->wait_for_state('wait_for' => 'FIRMWARE'); } otherwise { # suppress any error while checking for possible intermediate stage }; try{ $transit_obj->change_state(to => 'CLI', timeout => $timeout ); }catch NACL::Transit::Exceptions::TransitException with { my $exception = shift; if ( $exception->text !~ m[partner has taken over]i ) { $Log->exit() if $may_exit; $exception->throw(); }else{ $state = $transit_obj->get_state( timeout => $timeout ); ## If the node is in takeover state, partner command interface is needed ## to issue the force giveback operation back to the node. if ($state eq "TAKEN_OVER_WAITING_ONTAP") { my $giveback_opts = { command_interface => $partner, node => $partner, partner => $node_obj, 'method-timeout' => $timeout, }; NACL::STask::StorageFailover->giveback(%{$giveback_opts}, %opts); } } }; $transit_obj->change_state(to => 'CLI', iftakenover => "wait", timeout => $timeout); $node_obj->refresh_command_interface(); $Log->exit() if $may_exit; } sub _wait_for_panic_reboot{ my ($procs, $timeout) = @_; my @process = @{$procs}; foreach my $proc(@process) { eval { $proc->wait(timeout => $timeout); }; if (my $error = $@) { if ($error =~ /Timed out/) { NACL::Exceptions::Timeout->throw('node failed to come online and timed out ' .$timeout.' seconds'); } else { NATE::BaseException->throw($error); } } } } =head2 recover_from_quorum_errors $node->recover_from_quorum_errors(%opts); NACL::STask::Node->recover_from_quorum_errors( command_interface => $command_interface, partner => $partner, "method-timeout" => $timeout, ); $node_obj->recover_from_quorum_errors(partner => $partner); (Class or instance method) This method causes recovery of the node which is in out-of-quorum state. =over =item Options =over =item C<< "method-timeout"=>$timeout >> (Optional) How long in seconds to wait for the Transit call to reboot. Defaults to 1800 seconds. =item C<< partner >> (Optional for Class Method only. Not Applicable for instance method) Specifies the partner node of type NACL::C::CommandInterface::ONTAP which has to issue giveback to the node. =item C<< command_interface=>$command_interface >> (Required for class method, Not Applicable for instance method) See L =back =back =cut sub recover_from_quorum_errors { $Log->enter() if $may_enter; my $pkg_or_obj = shift; my %opts = $pkg_or_obj->_common_validate_with( params => \@_, additional_spec => { partner => { type => OBJECT }, "method-timeout" => {type => SCALAR, default => 1800}, }, allow_extra => 1 ); my $command_interface = delete $opts{command_interface}; my $partner_node = delete $opts{partner} ; my $timeout = delete $opts{'method-timeout'} ; my ($node_name , $node_obj, $node_systemshell_obj); require NACL::CS::ClusterKernelService; my @ClusterKernelService_state = NACL::CS::ClusterKernelService->fetch(command_interface => $partner_node); foreach my $nodes_state(@ClusterKernelService_state){ if(($nodes_state->status_quorum() =~ /out-of-quorum/) && ($nodes_state->status_oper() =~ /unknown/)){ $node_name = $nodes_state->cluster_node(); $node_obj = NACL::C::Node->new(node => $node_name); $node_systemshell_obj = $node_obj->get_systemshell_apiset(); ## steps to solve the quorum errors as discussed in burt#851395 $node_systemshell_obj->kenv('unset' => 1, 'persist' => 1, variable => "bootarg.init.boot_recovery",'privilege-level' => "root"); $node_systemshell_obj->rm('paths' => "/mroot/etc/cluster_config/monitor_mroot.nvfail", 'privilege-level' => "root"); $node_systemshell_obj->reboot('privilege-level' => "root"); ## recovering node after reboot ## calling method recover_from_waiting_for_giveback my $transit_obj = NACL::Transit->new(name => $node_name); my $transit_opts = { command_interface => $node_obj, 'node' => $node_obj, 'method-timeout' => $timeout, 'partner' => $partner_node, 'transit_obj' => $transit_obj, }; $pkg_or_obj->get_package_name()->recover_from_waiting_for_giveback(%{$transit_opts}); $node_obj->refresh_command_interface(); $node_obj->verify_state('health' => "true"); } } $Log->exit() if $may_exit; } =head2 set_bootargs $flag_arrref = [{'xxx' => 1},{'yyy' => '2'}]; NACL::STask::Node->set_bootargs( node => $node, command_interface => $command_interface, "method-timeout" => $timeout, "flags" => $flag_arrref, continue_on_failure => 1 ); $NODE_STASK_OBJ->set_bootargs( flags => $flag_arrref ); (Class or Instance method) This Method sets the various flags and their values on the node. =over =item Options =over =item c<< node => $node >> (Required for class method, Not Applicable for instance method) See L =item C<< command_interface => $command_interface >> (Required for class method, Not Applicable for instance method) See L =item C<< 'method-timeout' => $timeout >> (Optional) The default NATE timeout is 1800 seconds. If the command might take more time to complete, this option can be used to specify a larger timeout value. The value should be provided in seconds. =item C<< "flags" => \@ >> Flags names and their values as an array reference. =item C<< continue_on_failure => 1 >> This boolean value decides whether an invalid flag set will throw a warning or fails the method. In case if a warning is thrown the other flags will continue to get executed. Default value is 0. =back =back =over =item Exceptions =over =item C This type of exception is thrown when we try to set an invalid flag on a particular node. =item C This type of exception is thrown when we try to set an invalid flag value on a valid flag on a particular node. =back =back =cut sub set_bootargs { $Log->enter(); my ( $pkg_or_obj, @args ) = @_; # validate the parameters my %opts = $pkg_or_obj->_common_validate_with( params => \@args, additional_spec => { flags => { type => ARRAYREF }, continue_on_failure => { type => BOOLEAN, default => 0 }, "method-timeout" => {type => SCALAR, default => 1800}, }, ); my $timeout = delete $opts{'method-timeout'}; my @flag_arr = @{ delete $opts{flags} }; foreach my $flag_hash (@flag_arr) { foreach my $key ( keys %{$flag_hash} ) { try { NACL::C::Node->bootargs_set( command_interface => $opts{command_interface}, node => $opts{command_interface}->node, 'method-timeout' => $timeout, arg => $key, value => $flag_hash->{$key} ); } catch NACL::APISet::Exceptions::ResponseException with { my $exception = shift; if ( $opts{continue_on_failure} ) { $Log->warn( 'Failed to set the bootarg flag ' . $key . ' with error: ' . $exception->text() ); } else { $exception->throw(); } } } } $Log->exit(); } ## end sub set_bootargs =head2 set_sktrace $node->set_sktrace(%opts); NACL::STask::Node->set_sktrace( nodes => $nodes, trace_value_map => \%trace_value_map, nacltask_verify => $nacltask_verify, ); $node_obj->set_sktrace(nodes => $nodes,trace_value_map => $trace_value_map); (Class or instance method) This method can be used to enable/disable various sktrace variables on one or more nodes. =over =item Options =over =item C<< nodes >> (Optional) Either a single NACL object or arrayref of NACL objects from which the node name can be derived. On specified nodes various sktrace variables are enabled. default: local-node =item C<< trace_value_map >> (Required) Has hashref of sktrace values and corresponding values as key value pairs. =item C<< nacltask_verify => 1|0 >> (Optional) This option is used to verify whether the particular value is enabled/set for the node. The default value is 0. =item C<< continue_on_failure => 1 >> This boolean value decides whether an invalid sk_trace set will throw a warning or fails the method. In case if a warning is thrown the other sk_traces will continue to get executed. Default value is 0. =back This is a node level command and hence it will switch on the tracing for all the variables specified on that particular node. =back =cut sub set_sktrace { $Log->enter(); my ( $pkg_or_obj, @args ) = @_; my %opts = Params::Validate::validate( @args, { nodes => { type => ARRAYREF | OBJECT , default => $pkg_or_obj }, trace_value_map => { type => HASHREF }, nacltask_verify => {type => SCALAR, default => 0}, continue_on_failure => { type => BOOLEAN, default => 0 }, }, ); my @all_nodes; if ( ref($opts{nodes}) eq "ARRAY" ){ foreach my $node_element (@{$opts{nodes}}){ push(@all_nodes,$node_element->command_interface()); } }else{ push(@all_nodes,$opts{nodes}->command_interface()); } my $trace_value_map = $opts{trace_value_map}; my $verify = delete $opts{nacltask_verify}; foreach my $node (@all_nodes) { my $shell = $node->get_systemshell_apiset(); foreach my $trace (keys %$trace_value_map) { try{ $shell->sysctl( 'name' => "sysvar.sktrace.${trace}_enable=$trace_value_map->{$trace}", 'privilege-level' => "root", ); } catch NACL::APISet::Exceptions::ResponseException with { my $exception = shift; if ( $opts{continue_on_failure} ) { $Log->warn( 'Failed to set the SkTrace flag ' . ${trace} . ' with error: ' . $exception->text() ); } else { $exception->throw(); } }; if ($verify) { my $found_value; my $response = $shell->sysctl( 'name' => "sysvar.sktrace.${trace}_enable", 'privilege-level' => "root",); my $output = $response->get_processed_output(); $found_value = $1 if ( $output =~ /sysvar.sktrace.${trace}_enable:\s+(.*)/ ); if ( $trace_value_map->{$trace} ne $found_value ){ if ( $opts{continue_on_failure} ) { $Log->warn( "Verification Failed!: For node ".$node->name()."\nSKTrace variable: ".${trace}."\nFound value: ".$found_value."\nExpected value: ".$trace_value_map->{$trace}."\n" ); }else{ $Log->exit(); NACL::Exceptions::VerifyFailure->throw("Verification Failed!: For node ".$node->name()."\nSKTrace variable: ".${trace}."\nFound value: ".$found_value."\nExpected value: ".$trace_value_map->{$trace}."\n"); } } } } } $Log->exit(); } ## end set_sktrace 1;