# # Copyright (c) 2011-2013 NetApp, Inc., All Rights Reserved # Any use, modification, or distribution is prohibited # without prior written consent from NetApp, Inc. # ## @summary MCC Interconnect Module ## @author dl-mcc-dpg-qa@netapp.com ## @status shared ## @pod here package NACL::MTask::MCC::Interconnect; use strict; use warnings; use Carp; use base qw (NACL::MTask::MTask); use NATE::BaseException qw(:try); use NACL::MTask::MCC::Exceptions::Interconnect; use NATE::Log qw(log_global); my $Log = log_global(); my $may_enter = $Log->may_enter(); my $may_exit = $Log->may_exit(); use Params::Validate qw(validate validate_with BOOLEAN HASHREF OBJECT ARRAYREF SCALAR); use Data::Dumper; use Scalar::Util qw(weaken isweak); use List::Util qw(first) ; use NACL::MTask::MCC::Log; use NACL::CS::MetroclusterInterconnectMirror; =head1 NAME NACL::MTask::MCC::Interconnect =head1 DESCRIPTION This task is to be used for C-Mode MetroCluster testing. # Start test use NACL::MTask::MCC::Interconnect; ... ... # Perform test here ... # End test =cut =head1 Public METHODS Interconnect is used to get the state of metrocluster interconnect and modify the state as well. It's primary use is for validation of the interconnect state at various points in the lifecycle. Note that DR groups must be created before this is called (new) =over =back =cut # automatically generate the Non-public constructor (includes a call to 'init') # and member accessor methods use Class::MethodMaker [ new => [ '-init', 'new' ], scalar => [ { -type => 'NACL::MTask::MCC::Metrocluster' }, '_metrocluster', ], scalar => [ qw/ _warn_only _vsim _no_interconnect /, ], ]; # validate the parameters passed in and what else? sub init { $Log->enter() if $may_enter; my $self = shift; my %opts = validate_with( params => \@_, spec => { metrocluster => { isa => 'NACL::MTask::MCC::Metrocluster' }, warn_only => { regex => qr/^[01]$/, default => 01, }, #TODO change this to 0 soon }, ); #TODO before changing warn_only to off by default, make sure vsims #are appropriately handled...maybe keep vsims default to warn on or #something? (CTL vsims apparently default to turning of mirroring #per mandar) # set members per the passed constructor arguments. $self->_metrocluster($opts{'metrocluster'}); weaken ($self->{_metrocluster}); my $warn = $opts{'warn_only'} || $ENV{DISABLE_MCC_INTERCONNECT_STATE_VALIDATION}; $self->_warn_only($warn); # warn only if interconnect validation is disabled. if ($self->_warn_only()) { my $msg = ' MCC interconnect validation in WARN ONLY mode ' . '(will not throw exceptions)'; $Log->comment($msg); } # do no interconnect validation if it is a vsim # arbitrary dr group my ($dr_group) = values(%{ $self->_metrocluster()->get_DR_groups() }); unless ($dr_group->is_4_pack()) { $self->_no_interconnect(1); $Log->comment( '2 node testbed, skip interconnect validation...'); } if ($dr_group->L_is_vsim_group()) { $self->_vsim(1); $Log->comment( 'vsim testbed'); } $Log->exit() if $may_exit; } =head2 is_interconnect_down() $interconnect->is_interconnect_down( disaster_status => $disaster_status ); Returns true if at least one interconnect has 'mirror_oper_status' down. Returns false if this is a Vsim. ** We ignore the FC-VI/AUX states ** =cut sub is_interconnect_down { my $self = shift; my %opts = validate_with( params => \@_, spec => { disaster_status => { type => HASHREF } } ); # Return if vsim-- there is no fabric so consider it up $self->_vsim() and return; my $interconnects = $self->get_interconnect_state( disaster_status => $opts{'disaster_status'} ); foreach my $site ( keys %{ $interconnects->{'site'} } ) { my @states = @{ $interconnects->{'site'}{$site} }; foreach my $state ( @states ) { if ( uc $state->partner_type() eq 'AUX' and uc $state->type() eq 'FC-VI'){ $Log->comment('Ignoring FC-VI/AUX link states'); next; } if ( $state->mirror_oper_status() ne 'online' ) { $Log->comment("Mirror oper status is " . $state->mirror_oper_status()); return 1; } } } return; } =head2 get_interconnect_state() Returns the component state object (NACL::CS::MetroclusterInterconnectMirror) =cut #another sub will massage this into a better structure TODO sub get_interconnect_state { my $self = shift; my %opts = validate_with( params => \@_, spec => { disaster_status => { type => HASHREF }, requested_fields => { type => ARRAYREF, default => [ 'type', 'node', 'partner-name', 'partner-type', 'mirror-oper-status', 'adapter', 'status' ], }, }, ); my $disaster_status = delete($opts{disaster_status}); my $requested_fields = delete($opts{requested_fields}); my @live_sites = @{ $disaster_status->{live_sites} }; my $interconnect; #loop through clusters to get all interconnect show output for all nodes foreach my $site (@live_sites) { my $ci = $self->_metrocluster-> get_Metrocluster_component(site => $site)-> command_interface(); my @ic_states = NACL::CS::MetroclusterInterconnectMirror->fetch( command_interface => $ci, requested_fields => $requested_fields, ); $interconnect->{site}->{$site} = \@ic_states; } $Log->comment(' Interconnect States: ' . NACL::MTask::MCC::Utils->dump($interconnect)); return $interconnect; } # this is a private method to check mcc interconnect status sub validate_metrocluster_interconnect_state { my $trace = NACL::MTask::MCC::Log::TRACELOG->start_timer(); #metrics my $self = shift; # do nothing if interconnect validation is disabled. #if ($ENV{DISABLE_MCC_INTERCONNECT_STATE_VALIDATION}) { #my $msg = "DISABLE_MCC_INTERCONNECT_STATE_VALIDATION set!\n" . #' MCC interconnect NOT validated!'; #$Log->warn($msg); #return; #} # do nothing if 2 node (no interconnect) # we still check vsims though some vsim setups turn off # mirroring so we should only warn for vsims maybe? TODO if ($self->_no_interconnect()) { # do not skip if we tell the test not to skip 2-node validation if ( exists $ENV{'DONT_SKIP_2NODE_MCC_VALIDATION'} && $ENV{'DONT_SKIP_2NODE_MCC_VALIDATION'} == 1 ) { $Log->comment("Not skipping 2-node validation, because DONT_SKIP_2NODE_MCC_VALIDATION=1"); } else { my $msg = 'this is a 2 node system w/o interconnect... ' . 'MCC interconnect NOT validated!'; $Log->comment($msg); $self->_metrocluster->log_validation_event( type => 'interconnect', successful => 1, summary => "validate_metrocluster_interconnect_state() skipped" ); return; } } my %opts = validate_with( params => \@_, spec => { disaster_status => { type => HASHREF, optional => 1 }, }, allow_extra => 1 ); my $disaster_status = delete($opts{disaster_status}) || $self->_metrocluster()->get_disaster_status(include_HA => 1); my $ic_states = $self->get_interconnect_state( disaster_status => $disaster_status); my $ic_hash = _build_interconnect_hash($ic_states); #$Log->comment("interconnect hash:" . #NACL::MTask::MCC::Utils->dump($ic_hash)); my @all_errors; #all HA states for any dr group should be the same, just pick one my $dr_group = (keys %{$disaster_status->{HA_states}})[0]; my $disaster_nodes = $self->_metrocluster()->get_disaster_nodes( disaster_status => $disaster_status); $Log->comment( 'disaster nodes:' . join ',', keys %{ $disaster_nodes } ) if scalar keys %{$disaster_nodes}; #this isn't always valid...heal roots/disaster booted #if (my $disaster_site = $disaster_status->{disaster_site}) { #$Log->comment( #"Skipping interconnect validation for disaster site $disaster_site" #); #} foreach my $site (keys %{$ic_states->{site}}) { $Log->comment("Validating interconnect site $site"); my @ic_states = @{ $ic_states->{site}->{$site}}; foreach my $ic_state (@ic_states) { my $node = $ic_state->{node}; my $ha = $disaster_status->{HA_states}->{$dr_group}->{$site}; my $partner_name = $ic_state->{partner_name}; my $partner_site = $site eq 'A' ? 'B' : 'A'; my $partner_ha = $disaster_status->{HA_states}->{$dr_group}->{$partner_site}; my $err = "site $site -> $node -> $ic_state->{partner_type}" . " -> $ic_state->{adapter} is " . join ' and ', grep defined, ( (lc $ic_state->{mirror_status} ne 'online' ? $ic_state->{mirror_status} : undef), (lc $ic_state->{status} ne 'up' ? $ic_state->{status} : undef), ); if (lc $ic_state->{partner_type} eq 'aux') { #skip aux, we only care about HA and DR next; } elsif (#ignore iwarp path down if another up per node lc $ic_state->{status} ne 'up' && lc $ic_state->{mirror_status} eq 'online' && $ic_hash->{site}->{$site}->{$ic_state->{node}}->{iwarp}->{up} ) { #iwarp/HA only 1 path needed $Log->comment( 'iWarp connection down, but another HA path is up:' . $err); $Log->comment( NACL::MTask::MCC::Utils->dump($ic_state) ); } elsif (lc $ic_state->{mirror_status} ne 'online' || lc $ic_state->{status} ne 'up') { #offline is invalid except for HA or DR in some cases #my $err = $self->_build_interconnect_error($ic_state); if ( #HA lc $ic_state->{partner_type} eq 'ha' && $ha && lc $ha->{HA_state} ne 'normal' && (($ha->{survivor_node}->get_name() eq $node) || ($ha->{disaster_node}->get_name() eq $node)) ) { $Log->comment('expected error due to HA event: ' . $err); $Log->comment(NACL::MTask::MCC::Utils->dump($ic_state)); } elsif ( #DR from partner HA event lc $ic_state->{partner_type} eq 'dr' && $partner_ha && lc $partner_ha->{HA_state} ne 'normal' && $partner_ha->{disaster_node}->get_name() eq $partner_name ) { $Log->comment( 'expected error due to HA event for DR partner: ' . $err); $Log->comment(NACL::MTask::MCC::Utils->dump($ic_state)); } elsif ( #DR lc $ic_state->{partner_type} eq 'dr' && #do we need to be smarter than this? #or is !normal completely correct? $disaster_status->{DR_state} ne 'normal' && ($disaster_nodes->{$ic_state->{partner_name}} || $disaster_nodes->{$node}) ) { $Log->comment('expected error due to DR event: ' . $err); $Log->comment(NACL::MTask::MCC::Utils->dump($ic_state)); } else { $Log->comment("error: $err"); $Log->comment(NACL::MTask::MCC::Utils->dump($ic_state)); push @all_errors, $err; } } } } if (scalar @all_errors) { #generate more debugging info re: burt 718733, 706765 $self->_generate_rnic_debugging( disaster_status => $disaster_status, ); my $errmsg = join "\n", ('Interconnect Failures:', @all_errors); $self->_metrocluster->log_validation_event( type => 'interconnect', successful => 0, summary => $errmsg, results => \@all_errors, ); if ($self->_warn_only()) { $Log->warn("Interconnect warn only mode on:\n$errmsg"); } else { NACL::MTask::MCC::Exceptions::Interconnect->throw($errmsg); } } else { $Log->comment(' ...Interconnect OK'); #generate more debugging info re: burt 718733, 706765 $self->_generate_rnic_debugging( disaster_status => $disaster_status, ); $self->_metrocluster->log_validation_event( type => 'interconnect', successful => 1, summary => "validate_metrocluster_interconnect_state() successful" ); } return; } # Private method to generate debugging output sub _generate_rnic_debugging { my $trace = NACL::MTask::MCC::Log::TRACELOG->start_timer(); #metrics my $self = shift; my %opts = validate_with( params => \@_, spec => { disaster_status => { type => HASHREF, optional => 1 }, }, ); my $disaster_status = delete($opts{disaster_status}); $Log->comment('Interconnect debugging info...'); foreach my $site (@{$disaster_status->{live_sites}}) { my $cluster = $self->_metrocluster->get_cluster(site => $site); foreach my $mcc_node (@{$cluster->L_get_cluster_mcc_nodes()}) { my @commands = ('rnic status', 'rnic mcc status', ); my $node = $mcc_node->get_Node_component(); foreach my $cmd (@commands) { try { my $response = $node->get_7m_or_nodescope_apiset( interface => 'CLI')->execute_command( command => $cmd ); } catch NATE::BaseException with { $Log->comment("Ignoring error in debugging call"); }; } } } } # Private FUNCTION to build interconnect struct sub _build_interconnect_hash { my $trace = NACL::MTask::MCC::Log::TRACELOG->start_timer(); #metrics my $ic = shift; my %hash; #don't ask, pretty hasssshhhhh.... foreach my $site (keys %{$ic->{site}}) { foreach my $item (@{$ic->{site}->{$site}}) { my $partner_type = lc $item->{partner_type}; my $status = lc $item->{status}; my $mirror = lc $item->{mirror_status}; my $type = lc $item->{type}; my $node = $item->{node}; push @{$hash{site}->{$site}->{$node}->{$partner_type}->{_all}}, $item; push @{$hash{site}->{$site}->{$node}->{$mirror}->{_all}}, $item; push @{$hash{site}->{$site}->{$node}->{$status}->{_all}}, $item; push @{$hash{site}->{$site}->{$node}->{$type}->{_all}}, $item; push @{$hash{site}->{$site}->{$node}->{$partner_type}->{$status}}, $item; push @{$hash{site}->{$site}->{$node}->{$mirror}->{$status}}, $item; push @{$hash{site}->{$site}->{$node}->{$type}->{$status}}, $item; push @{$hash{site}->{$site}->{$node}->{$partner_type}->{$mirror}}, $item; push @{$hash{site}->{$site}->{$node}->{$status}->{$mirror}}, $item; push @{$hash{site}->{$site}->{$node}->{$type}->{$mirror}}, $item; push @{$hash{mirror_status}->{$mirror}->{$site}}, $item; push @{$hash{status}->{$status}->{$site}->{$node}}, $item; } } return \%hash; } # Private method to build the error message for interconnect failures sub _build_interconnect_error { my $trace = NACL::MTask::MCC::Log::TRACELOG->start_timer(); #metrics my $self = shift; my ($interconn) = @_; my @errs; push(@errs, "node is $interconn->{'node'}"); push(@errs, "partner type is $interconn->{'partner_type'}"); if ( lc($interconn->{'mirror_status'}) ne 'online') { push(@errs, ' mirror status for adapter' . " $interconn->{'adapter'} is " . " $interconn->{'mirror_status'}"); } if ( lc($interconn->{'status'}) ne 'up' ) { push(@errs, 'interconnect status for adapter ' . "$interconn->{'adapter'} is $interconn->{'status'}"); } return ucfirst( join(', ', @errs) ); } 1;