## Copyright (c) 2001-2017 NetApp, Inc., All Rights Reserved
##   Any use, modification, or distribution is prohibited
##   without prior written consent from NetApp, Inc.
##
#

## @summary HA Task Module
## @author rajaram@netapp.com,dl-ha-qa@netapp.com
## @status shared
## @pod here

=head1 NAME

NACL::MTask::HA

=head1 DESCRIPTION

This task is to be used for initiating storage failover takeover and storage failover giveback,
and verify that the relevent ems,asup are generated, the nodes are in appropriate states,
aggr are online and disk ownerships are as expected.

Apart from validating the states of the nodes after Takeover/Giveback (done by NACL::STask::StorageFailover),
the task performs the following verifications additionally ::
checking for appropriate ems, asup, takeover/giveback duration, disk ownership changes, aggregate states, etc.

If users want to do only a basic check of the state of the nodes after a takeover/giveback
then they can use the StorageFailover STask (NACL::STask::StorageFailover).

This MTask will in turn be calling the StorageFailover STask methods wherever necessary


    # Invoking a storage failover takeover.
    use NACL::MTask::HA;
    ...
    my $Node1 = NACL::MTask::HA->new(
                command_interface => $node,
        );
        $Node1->takeover(type=>"reboot", nacltask_verify_aggr_state_owner => 1);
    ...
    # Perform any operations here
    ...
        $Node1->giveback('override-vetoes'=> "true", 'require-partner-waiting'=> "true",
                          nacltask_verify_ems => 1);

    # End test

=cut

=head1 ATTRIBUTES

If users want to create an HA object when partner is down but the actual cross
connect is present check the below example

    use NACL::MTask::HA;
    ...
    my $Node1 = NACL::MTask::HA->new(
                command_interface => $node,
                verify_cross_connect => "0",
                reset_flags => "0"
       );
    ...
    # Perform any operations here
    ...
    $Node1->giveback();
    # End test

=head2 node

The node on which the takeover/giveback will be initiated

=cut

package NACL::MTask::HA;

use strict;
use warnings;

use base qw(NACL::MTask::MTask);

use NATE::Log qw(log_global);
my $Log = log_global();
my $may_enter = $Log->may_enter();
my $may_exit  = $Log->may_exit();
use NATE::Process;
use Params::Validate
  qw(validate validate_with BOOLEAN HASHREF OBJECT ARRAYREF SCALAR);
use NACL::Exceptions::OperationFailed;
use NACL::Exceptions::EventCheckFailure qw(:try);
use NACL::Exceptions::UnexpectedState qw(:try);
use NACL::APISet::Exceptions::ResponseException qw(:try);
use NATE::Exceptions::Argument;
use NACL::STask::Node;
use NACL::C::StorageAggregate;
use NACL::C::Volume;
use NACL::STask::StorageDisk;
use NACL::STask::StorageFailover;
use NACL::MTask::EventLogDetector;
use NACL::C::EventLog;
use NACL::CS::StorageFailover;
use NACL::MTask::StorageAggregateRelocation;
use NACL::C::SystemServicesNtpConfig;
use NACL::C::EventConfig;
use NACL::MTask::SystemLogDetector;
use NACL::ComponentUtils qw(Dumper);
use NACL::Exceptions::EventCheckFailure qw(:try);
use NACL::C::Exceptions::TimeNotSynchronised qw(:try);
use NACL::APISet::Exceptions::TimeoutException qw(:try);
use NACL::GeneralUtils qw(nacl_method_retry);
use NACL::C::Exceptions::StorageAggregate::ZsmConnectionError qw(:try);
use NATE::STAF::Peer qw($Parent);
use NATE::Events qw(listener_add listener_remove eventloop);
use NACL::CS::ClusterDate;
use NACL::C::ClusterDate;
use NACL::MTask::Exceptions::HAGatherDiagException;
use Storable qw(freeze thaw);

# Max duration within which a takeover or Giveback is expected to complete
use constant MAX_TO_GB_DURATION => 60;
use constant POLL_INTERVAL      => 10;
# Timeout for aggr to come online
use constant AGGR_ONLINE_TIMEOUT => 40;

# SFO aggregate outage timeout
# burt826198 changed from 60 -> 120
use constant SFO_AGGR_ONLINE_TIMEOUT => 120;

# CFO aggregate outage timeout
use constant CFO_AGGR_ONLINE_TIMEOUT => 60;

# After giveback node takes about 400 seconds to reach SF_UP
use constant WFG_TO_SF_UP_TIMEOUT => 320;

# Timeout for failover state to become 'In takeover'
# after takeover operation. For FANTA, timeout is
# (No. of online aggrs) * SFO_AGGR_ONLINE_TIMEOUT.
# Otherwise it is TAKEOVER_TIMEOUT.
use constant TAKEOVER_TIMEOUT => 120;

use constant POLL_DELTA => 5;

use constant MCC_IP_DETECTION_TIME      => 20;
use constant NON_MCC_IP_DETECTION_TIME  => 15;

=head1 METHODS

=head2 new

my $Node1 = NACL::MTask::HA->new(
    node => $node,
);

Create a HA object that can be used to initiate takeover or giveback
and do the relevent verifications.

=over

=item Options

=over

=item C<< node => $node >>

(Required, isa NACL::C::CommandInterface)
The command interface and node on which to initiate takeover/giveback

=item C<< reset_flags => $boolean >>

(Optional, Default value is 1)
Setting this option will reset the HA flags to their default values upon
object creation

=item C<< verify_cross_connect => $boolean >>

(Optional, Default value is 1)
Setting this option will check for the disk shelves are cross-connected and also
setting the storage failover mode to ha

=back

=back

=cut

use Class::MethodMaker [
    new    => [ '-hash', '-init', 'new' ],
    scalar => 'node',
    scalar => 'reset_flags',
    array  => '_partner_online_sfo_aggrs',
    scalar => '_partner_cfo_aggr',
    scalar => '_partner_sfo_aggr_volumes',
    scalar => 'verify_cross_connect',
    scalar => [
        { -type => 'NACL::MTask::StorageAggregateRelocation' },
        'arl_mtask_obj',
    ],
];

sub init {
    my ($self, %args) = @_;
    $self->_execute_with_gather_diag(
        method => 'init_executed',
        exception => 'NACL::MTask::Exceptions::HAGatherDiagException',
        method_args => \%args,
    );
}

sub init_executed {
    $Log->enter() if $may_enter;
    my $self = shift;
    my %opts = validate_with(
        params => \@_,
        spec   => {
            node        => { type => OBJECT,  isa     => "NACL::C::Node" },
            reset_flags => { type => BOOLEAN, default => 1 },
            verify_cross_connect => { type => BOOLEAN, default => 1 },
        }
    );

    my %common_opts;
    $self->_move_common_component_params(
        source => \%opts,
        target => \%common_opts
    );

    my $node = $opts{node};
    my $partner =
      NACL::STask::Node->get_partner_obj( command_interface => $node );
    my $node_name    = $node->node();
    my $partner_name = $partner->node();
    my %volumes_aggr;

    ## If verify_cross_connect is set then verify the disk shelves are
    ## cross-connected and also setting the storage failover mode to ha.

    if ( $opts{verify_cross_connect} ) {
        $Log->comment(
            "Check if disk shelves are cross-connected to the filers");

        ## Check if disk shelves are cross-connected to the filers
        try {
            NACL::STask::StorageDisk->check_cross_connect(
                command_interface => $node,
                node1             => $node_name,
                node2             => $partner_name,
                %common_opts,
            );
        }
        catch NACL::APISet::Exceptions::ResponseException with {
            my $exception_object = shift;
            $Log->comment(
                "Failed to check cross connect, " . $exception_object->text() );
            NACL::APISet::Exceptions::ResponseException->throw(
                "Failed to check cross connect");
        };

        my $check_auto_giveback = 0;
        unless( $opts{reset_flags} ) {
            my $cs = NACL::CS::StorageFailover->fetch(
                command_interface => $node,
                filter            => { node => $node_name },
                requested_fields  => [qw(auto-giveback)],
            );
            if($cs->auto_giveback() =~ /false/i) {
                $check_auto_giveback = 1;
            }
        }
        ## HANLR changes need to go in here
        ## Setting storage failover mode to ha

        $Log->comment("Enable Storage failover");
        ## Enable storage failover
        NACL::STask::StorageFailover->enable(
            command_interface => $node,
            node              => $node,
            %common_opts,
        );

        if($check_auto_giveback) {
            NACL::C::StorageFailover->modify(
                command_interface  => $node,
                node               => $node,
                'auto-giveback'    => "false",
            );
        }
    }

    ## If reset_flags is set then reset both the node and the partner nodes'
    ## HA flags
    if ( $opts{reset_flags} ) {
        $Log->comment("Reset HA flags");
        $self->reset_HA_flags( node => "$node_name,$partner_name", );
    }


    # Need to check 'partial_giveback' failover state.
    # If so, issue giveback command.
    my $cs = NACL::CS::StorageFailover->fetch(
        command_interface => $node,
        filter            => { node => $node_name },
        requested_fields  => [qw(partner-state)],
    );

    if ( $cs->partner_state() =~ m[partial_giveback]i ) {
        NACL::STask::StorageFailover->giveback(
            command_interface => $node,
            node              => $node,
            partner           => $partner,
            'override-vetoes' => 'true',
        );
    }

    # Get the list of partner aggr names
    my @partner_aggrs = NACL::CS::StorageAggregate->fetch(
        command_interface => $node,
        filter            => { 'home-name' => $partner_name },
        requested_fields  => [qw (aggregate ha-policy state)],
    );
    my @online_sfo_aggregates;
    foreach my $aggr (@partner_aggrs) {

        # Store online/non-online sfo aggregate names
        if ( $aggr->ha_policy() eq 'sfo' ) {
            if ( $aggr->state eq 'online' ) {
                push( @online_sfo_aggregates, $aggr->aggregate() );
            }
        }

        # Store cfo aggregate name
        if ( $aggr->ha_policy() eq 'cfo' ) {
            if ( $aggr->state eq 'online' ) {
                $self->_partner_cfo_aggr( $aggr->aggregate() );
            }
        }
    }
    $self->_partner_online_sfo_aggrs(@online_sfo_aggregates);
    $Log->debug(
        sub {
"capturing Volume state information for all volumes in the partner aggregate list";
        }
    );
    foreach my $aggr (@online_sfo_aggregates) {
        my @aggr_vol = NACL::CS::Volume->fetch(
            command_interface => $node,
            filter            => { aggregate => $aggr },
            requested_fields  => [qw(volume vserver state)],
            is_system_vol    => 0,
            allow_empty       => 1
        );
        foreach my $av (@aggr_vol) {
            my $volume = $av->volume();
            $volumes_aggr{$aggr}{$volume}{'vserver'} = $av->vserver();
            $volumes_aggr{$aggr}{$volume}{'state'}   = $av->state();
        }
    }
    $self->_partner_sfo_aggr_volumes( \%volumes_aggr );
    NACL::C::EventConfig->modify(
        command_interface => $node,
        "suppression"     => "off"
    );
    $Log->exit() if $may_exit;
} ## end sub init

=head2 takeover

    $Node1->takeover();

(Instance method)
Initiate a takeover from Node1 and verify the takeover. FANTA takeover is
applicable on following cases :

1. If takeover type is takeover_command, 'bypass-optimization' is false and
   'option' is normal or allow-version-mismatch.

2. If 'bypass-optimization' attribute is not defined, it checks
   'bypass-takeover-optimization' field and based on it's value, sets the
   'bypass-optimization' attribute.

NATE::Exceptions::Argument will be thrown if 'bypass-optimization' attribute
is specified for RR and BR.

=over

=item Options

=over

=item C<< 'skip-lif-migration' => true|false >>

(Optional, defaults to UNDEF)
If this option is defined then the value is set before issuing takeover.
This is an option in "storage failover takeover" command.

=item C<< type => $scalar >>

( Optional, defaults to "takeover_command" )
Possible values are "takeover_command", "panic", "reboot", "halt",
"powercycle", "watchdog_reset".
Currently the following options of takeover are supported -  takeover_command,
panic, reboot, halt. The remaining options of takeover powercycle/watchdog_reset
will be implemented later.

=item C<< option => $scalar >>

( Optional, defaults to "normal" )
Possible values are "immediate", "force", "allow-version-mismatch",
"allow-disk-inventory-mismatch".

=item C<< polling_interval => $scalar >>

( Optional, defaults to 10secs )
This is the interval (in seconds) after which the status of takeover will
be polled.

=item C<< method_timeout => $method_timeout >>

(Optional) Time in seconds when takeover is expected to complete
Default : Value used in the STask StorageFailover

=item C<< takeover_expected => $boolean >>

( Optional, defaults to 1 )
Boolean value, 0 or 1. Option to indicate if takeover is expected to happen or
not. The behaviour of "nacltask_verify" depends on whether a takeover is
expected or not.

=item C<< nacltask_inhibit_takeover => $boolean >>

( Optional, defaults to 0 )
Option to inidicate if the takeover due to reboot/halt should be inhibited

=item C<< failure_reason => $scalar >>

( Optional )
The reason for which the takeover is expected to fail.
Possible values are,
disk_inventory_mismatch
version_mismatch
nvram_logs_unsynced
unable_to_access_partner_mailbox_disks
This option is not yet implemented.

=item C<< nacltask_verify_node_sfo_state => $boolean >>

(Optional, defaults to 1)
If 1, then verify the state of the local node using wait_for_state() STask

=item C<< nacltask_verify_partner_sfo_state => $boolean >>

(Optional, defaults to 0)
If 1, then verify the state of the partner node using wait_for_state() STask

=item C<< nacltask_verify_aggr_state_owner => $boolean >>

(Optional, defaults to 1)
If 1, then verify that all the aggregates are online after takeover and
also verify the ownerships of sfo/cfo aggrs.

In case if the method is not able to query the aggregate and check its state
after repeated polling for 60 seconds, due to the following error-
"Error: show failed: Failed to get the information for aggregate {aggr}.
Reason: ZSM - Can't connect to host",
then an exception of type C<<NACL::C::Exceptions::StorageAggregate::ZsmConnectionError>>
is thrown.

=item C<< nacltask_verify_ems => $boolean >>

(Optional, defaults to 1)
If 1, then verify that that the following ems messages are generated and takeover
duration is within limits.

=item C<< nacltask_verify_asup => $boolean >>

(Optional, defaults to 0)
If 1, then verify that the appropriate asup's are generated.
Currently not supported.

=item C<< nacltask_verify_disk_ownership => $boolean >>

(Optional, defaults to 0)
If 1, then verify that the current owner for disks in sfo-styled aggrs is the
node.
Currently not supported.

=item C<< nacltask_powercycle_after_panic => $boolean >>

(Optional, defaults to 0)
If 1, then partner node will be powercycled when takeover_type ='panic' in STask.
When the value is 0, the routine ensures the partner reaches the firmware prompt
after panic in STask.

=back

=back

=cut

sub takeover {
    my ($self, %args) = @_;
    $self->_execute_with_gather_diag(
        method => 'takeover_executed',
        exception => 'NACL::MTask::Exceptions::HAGatherDiagException',
        method_args => \%args,
    );
}

sub takeover_executed {
    $Log->enter() if $may_enter;
    my $self = shift;

    my %opts = validate_with(
        params => \@_,
        spec   => {
            type   => { type => SCALAR, default => "takeover_command" },
            option => { type => SCALAR, default => "normal" },
            'skip-lif-migration'=> { type => SCALAR, optional => 1 },
            'bypass-optimization' => { type => SCALAR, optional => 1 },
            polling_interval  => { type => SCALAR, default  => POLL_INTERVAL },
            method_timeout    => { type => SCALAR, optional => 1 },
            takeover_expected => { type => SCALAR, default  => 1 },
            failure_reason    => { type => SCALAR, optional => 1 },
            nacltask_verify_node_sfo_state => { type => BOOLEAN, default => 1 },
            nacltask_verify_partner_sfo_state =>
              { type => BOOLEAN, default => 0 },
            nacltask_verify_aggr_state_owner =>
              { type => BOOLEAN, default => 1 },
            nacltask_verify_ems       => { type => BOOLEAN, default => 1 },
            nacltask_inhibit_takeover => { type => BOOLEAN, default => 0 },
            nacltask_powercycle_after_panic =>
              { type => BOOLEAN, default => 0 },
            nacltask_boot_after_halt => { type => BOOLEAN, default => 0 },
            nacltask_log_retries     => { type => SCALAR,  default => 3 },
        },
        allow_extra => 1,
    );

    my %common_opts;
    $self->_move_common_component_params(
        source => \%opts,
        target => \%common_opts
    );

    my $node      = $self->node();
    my $node_name = $node->node();
    my $partner =
      NACL::STask::Node->get_partner_obj( command_interface => $node );
    my $partner_name         = $partner->node();
    ## Temporary fix to bypass the ems verification ( Fix will be back ported after the burt749767  is fixed )
    my $ems_check            = delete $opts{nacltask_verify_ems};
    $ems_check = !$::BYPASS_EMS if ( defined $::BYPASS_EMS && $ems_check );
    my $aggr_check           = delete $opts{nacltask_verify_aggr_state_owner};
    my $nacltask_log_retries = delete $opts{nacltask_log_retries};
    my @online_sfo_aggregates = $self->_partner_online_sfo_aggrs();
    my @partner_aggr_names =
      ( @online_sfo_aggregates, $self->_partner_cfo_aggr() );
    my $is_fanta_takeover;
    my $takeover_type = $opts{type};

    # store any errors we have encountered
    my @error;

    # Constructing the hash to send to STask takeover
    my %sfo_opts;
    $sfo_opts{node}                      = $node;
    $sfo_opts{partner}                   = $partner;
    $sfo_opts{takeovertype}              = $opts{type};
    $sfo_opts{option}                    = $opts{option};
    $sfo_opts{nacltask_poll_interval}    = $opts{polling_interval};
    $sfo_opts{nacltask_inhibit_takeover} = $opts{nacltask_inhibit_takeover};
    $sfo_opts{nacltask_verify} = $opts{nacltask_verify_node_sfo_state};
    $sfo_opts{nacltask_check_partner_state} =
      $opts{nacltask_verify_partner_sfo_state};
    $sfo_opts{nacltask_powercycle_after_panic} =
      $opts{nacltask_powercycle_after_panic};
    $sfo_opts{nacltask_expect_takeover} = $opts{takeover_expected};
    $sfo_opts{nacltask_boot_after_halt} = $opts{nacltask_boot_after_halt};

    if ( defined $opts{method_timeout} ) {
        $sfo_opts{'method-timeout'} = $opts{method_timeout};
    }

    if ( defined $opts{'skip-lif-migration'} ) {
        $sfo_opts{'skip-lif-migration'} = $opts{'skip-lif-migration'};
    }

    if ( defined $opts{'bypass-optimization'} ) {
        $sfo_opts{'bypass-optimization'} = $opts{'bypass-optimization'};
    }

    # Set the verification details
    if ( $opts{nacltask_inhibit_takeover} or !( $opts{takeover_expected} ) ) {
        $ems_check  = 0;
        $aggr_check = 0;
    }

    # Construct and start the EventLogDetector object
    my @check_for_all_presence = (
        'cf.fm.takeoverStarted', 'cf.fm.takeoverComplete',
        'cf.fm.takeoverDuration',
    );
    if (   ( $takeover_type eq "system_powercycle" )
        or ( $takeover_type eq "system_power_off" )
        or ( $takeover_type eq "system_reset" )
        or ( $takeover_type eq "watchdog_reset" ) )
    {
        my @hwassist_events =
          ( 'cf.fsm.stateTransit', 'cf.hwassist.takeoverTrapRecv' );
        push( @check_for_all_presence, @hwassist_events );
    }

    my $system_log_detector = NACL::MTask::SystemLogDetector->new(
        command_interface           => $node,
        set_log_file                => ['/mroot/etc/log/ems'],
        nacltask_skip_nonascii_file => 0,
    );
    $system_log_detector->start();

    my $verify_fanta_ems;

    # If 'bypass-optimization' option isn't defined and takeover
    # optimization supported, check bypass-takeover-optimization
    # attribute's value and assign the same value to
    # $sfo_opts{'bypass-optimization'} parameter.


    ### bypass-takeover-optimization option is not  supported in HN.0. BURT 1023506.
    #if ( !defined $sfo_opts{'bypass-optimization'} ) {
    #    $sfo_opts{'bypass-optimization'} = NACL::CS::StorageFailover->fetch(
    #        command_interface => $node,
    #        filter            => { node => $node },
    #        requested_fields  => [qw(bypass-takeover-optimization)]
    #    )->bypass_takeover_optimization();
    #}

    # Check these conditions 1> takeover optimization isn't supported
    # 2> bypass-optimization is specifed as 'true'. If any one of
    # these condition is true, then it is non FANTA takeover, otherwise
    # fanta takeover

    $sfo_opts{'bypass-optimization'} = 'false';

    my  $detector = NACL::MTask::EventLogDetector->new(
        command_interface      => $node,
    );
    my $proc;

    if (  $sfo_opts{'bypass-optimization'} eq 'true'
        || $sfo_opts{takeovertype} ne 'takeover_command'
        || $sfo_opts{option}       ne 'normal'
        || !$opts{takeover_expected} )
    {

        # Start capturing ems logs
        if ($ems_check) {
            # mark start time for event log fetch.
            $detector->start();
            my %args = ( 'detector' => $detector );
            $proc = NATE::Process->new(
                codespec => sub { $self->_event_collector(@_);},
                args => [ %args ],
                runid => 'TO_event_logs',
            );
            # Start collecting event log.
            $proc->start;
        }


        # Calling the STask takeover to initiate a takeover
        NACL::STask::StorageFailover->takeover(
            command_interface => $node,
            %sfo_opts,
            %common_opts,
        );

        # Call verify methods to validate the different
        # elements after takeover
        # Aggr check
        if ($aggr_check) {
            try {
                $self->_verify_aggr_state_owner(
                    aggr_names => \@partner_aggr_names,
                    home_name  => $partner_name,
                    owner_name => $node_name,
                );
            }
            catch NACL::Exceptions::UnexpectedState with {
                my $exception_object = shift;
                push( @error, $exception_object->text() );
            };
        }
    }
    else {

        # FANTA Takeover
        my $arl_task_obj = NACL::MTask::StorageAggregateRelocation->new(
            command_interface => $node,
            node              => $partner->node(),
            destination       => $node->node(),
            aggregate_list    => \@online_sfo_aggregates
        );

        # Create aggr-vol list for arl verification
        $arl_task_obj->create_aggr_vol_list(
            'aggregate-list' => \@online_sfo_aggregates );

        my $expected_takeover_state;

        # Set is_fanta_takeover flag to 1. This flag will
        # be used for fanta ems verification.
        $is_fanta_takeover = 1;

        my $expected_partner_state = "wfg";
        my $state                  = NACL::CS::StorageFailover->fetch(
            command_interface => $node,
            filter            => { node => $node },
            requested_fields  => [qw(auto-giveback)]
        );
        my $auto_giveback = $state->auto_giveback();
        if ( $auto_giveback eq 'false' ) {
            $expected_takeover_state = "takeover";
        }
        else {
            $expected_takeover_state = "takeover_autogiveback_scheduled";
        }

        # Start capturing ems logs
        if ($ems_check) {
            #Mark begin time for event log
            $detector->start();
            my %args = ( 'detector' => $detector );
            $proc = NATE::Process->new(
                codespec => sub { $self->_event_collector(@_); },
                args => [ %args ] ,
                runid => 'TO_event_logs',
            );

            #start collecting event log.
            $proc->start;
        }

        # This is detector object will be used to verify aggregate
        # temporary relocation message 'sfo.aggr.relocated.temp'

        $arl_task_obj->create_event_log_detector();
        $self->arl_mtask_obj($arl_task_obj);

        # Calling the STask takeover to initiate a takeover.
        # Skipping failover state check, need to verify sfo
        # aggr relocation time.
        NACL::STask::StorageFailover->takeover(
            command_interface => $node,
            %common_opts,
            %sfo_opts,
            nacltask_verify              => 0,
            nacltask_check_partner_state => 0,
        );

        # Verify aggregate relocation.
        # ARL verification is mandatory for FANTA takeover,
        # So nacltask_verify_aggr_state_owner param value
        # is ignored here. Also, calculate takeover timeout
        # here. For fanta,
        # takeover_timeout = (No. of online aggrs) * SFO_AGGR_ONLINE_TIMEOUT
        # Otherwise it is TAKEOVER_TIMEOUT (which is 120 secs)

        my %method_timeout;
        if (@online_sfo_aggregates) {
            my $arl_timeout =
              scalar @online_sfo_aggregates * SFO_AGGR_ONLINE_TIMEOUT;
            $arl_task_obj->perform_arl_verifications(
                'method-timeout'    => $arl_timeout,
                nacltask_arl_type   => 'TakeOver',
                nacltask_verify_ems => 0,
            );
        }
        else {
            %method_timeout = ( 'method-timeout' => TAKEOVER_TIMEOUT );
        }

        # Now check failover state. Expected state is 'In takeover'.
        # Takeover timeout value which is calculated in the previous
        # step is used here.
        NACL::STask::StorageFailover->wait_for_state(
            command_interface => $node,
            node              => $node,
            partner           => $partner,
            node_state        => $expected_takeover_state,
            %common_opts,
            %method_timeout,
        );
        $Log->comment('Takeover state is verified');

        # If nacltask_check_partner_state is specified, wait for wfg state.
        if ( $sfo_opts{nacltask_check_partner_state} ) {
            NACL::STask::StorageFailover->wait_for_state(
                command_interface => $node,
                node              => $node,
                partner           => $partner,
                node_state        => $expected_takeover_state,
                partner_state     => $expected_partner_state,
                %common_opts,
            );
        }
    }

    # Check nacltask_verify_ems
    if ($ems_check) {
 
        my $ems = $self->_get_result_and_stop(till_time => 120 , proc => $proc) if ( $proc->is_running() );
        ## Check for the presence of ems 
        try {
            $Log->comment("Check for the presence of all non FANTA ems");
            my %ems = map { $_->{messagename},1 } @$ems;
            my @missed_ems = map { $_  } grep ( !defined $ems{$_}, @check_for_all_presence ); 
            if ( @missed_ems ) {
                $Log->exit() if $may_exit;
                NACL::Exceptions::EventCheckFailure->throw(
                    'EMS Not Found : ' . Dumper( \@missed_ems ) );

            }
            $Log->comment("Print the event messages");
            my %takeover_types = map { $_ , 1 }
                   (qw/system_powercycle system_power_off system_reset watchdog_reset/);
            foreach my $event ( @check_for_all_presence ) {
                    if ( exists $takeover_types{$takeover_type} &&
                     (  ( $event eq  "cf.fsm.stateTransit")
                    or ( $event eq "cf.hwassist.takeoverTrap" ) )
                )
                {
                    $self->_print_ems_messages(
                        event_array             => $ems,
                        event                    => $event,
                        takeover_type            => $takeover_type,
                    );
                }
                else {
                    $self->_print_ems_messages(
                        event_array             => $ems,
                        event                    => $event,
                    );
                }
            }
            $Log->comment("Verify the takeover duration");
            $self->_verify_takeover_giveback_duration(
                event_array => $ems );

            # Verify fanta ems logs
            if ($is_fanta_takeover) {
                $self->_verify_fanta_ems(
                    event_array   => $ems,
                    sfo_aggregates => \@online_sfo_aggregates,
                );
            }
        }
        catch NACL::Exceptions::EventCheckFailure with {
            my $e = shift;
            $Log->warn( 'Event log verification failed. ' . $e->text() );
            $self->_check_ems_log(
                detector             => $system_log_detector,
                sfo_aggrs            => \@online_sfo_aggregates,
                takeover_type        => $takeover_type,
                nacltask_is_fanta    => $is_fanta_takeover,
                nacltask_log_retries => $nacltask_log_retries
            );
	    push( @error, $e->text() );
        };
    }

    # check if there are no errors
    if (@error) {
        $Log->exit() if $may_exit;
        NACL::Exceptions::OperationFailed->throw(
            "Takeover failed: " . join( "\n", @error ) );
    }

    $Log->exit() if $may_exit;
} ## end sub takeover

=head2 giveback

    $Node1->giveback();

(Instance method)
Initiate a giveback from Node1 and verify the giveback.

=over

=item Options

=over

=item C<< giveback_expected  => $boolean >>

( Optional, defaults to 1 )
If 1, then giveback is expected to complete successfully.
If 0, then giveback is expected to fail due to any of the reasons mentioned in
"failure_reason"
The behaviour of "nacltask_verify" depends on whether a giveback is
expected or not.

=item C<< 'require-partner-waiting'  => true | false >>

( Optional, defaults to UNDEF )
If this option is defined then the partner state is not checked before issuing
givback.
This is an option in "storage failover giveback" command.

=item C<< 'override-vetoes'  => true | false >>

( Optional, defaults to UNDEF )
If this option is defined then the giveback will be done overriding the vetoes.
This is an option in "storage failover giveback" command.

=item C<< 'only-cfo-aggregates'  => true | false >>

( Optional, defaults to UNDEF )
If this option is defined then only the cfo styled aggregates with will given
back and the node will enter a partial giveabck state.
This is an option in "storage failover giveback" command

=item C<< auto_giveback_expected => 0 >>

(Optional) user input flag that is used to decide whether an auto giveback
is expected or not. Example, if takeover happens due to partner reboot then
even if the autogiveback flag is disabled, a autogiveback is expected to
happen
Default : 0

=item C<< polling_interval => $scalar >>

( Optional, defaults to 10secs )
This is the interval (in seconds) after which the status of giveback will
be polled.

=item C<< method_timeout => $method_timeout >>

(Optional) Time in seconds when giveback is expected to complete
Default : Value used in the STask StorageFailover

=item C<< failure_reason => $scalar >>

( Optional )
The reason for which the giveback is expected to fail.
Possible values are
unable_to_read_partner_state
disk_inventory_information_not_yet_received
autosupport_vetoed
failed_due_to_diskcheck

=item C<< nacltask_verify_sfo_state => $boolean >>

(Optional, defaults to 1)
Defaults to 0 (do not perform verification).
If 1, then giveback verification methods will be invoked to verify the
giveback scenario.

=item C<< nacltask_verify_aggr_state_owner => $boolean >>

(Optional, defaults to 1)
If 1, then verify that all the aggregates are online after giveback

In case if the method is not able to query the aggregate and check its state
after repeated polling for 60 seconds, due to the following error-
"Error: show failed: Failed to get the information for aggregate {aggr}.
Reason: ZSM - Can't connect to host",
then an exception of type C<<NACL::C::Exceptions::StorageAggregate::ZsmConnectionError>>
is thrown.

=item C<< nacltask_verify_ems => $boolean >>

(Optional, defaults to 1)
If 1, then verify that that the following ems messages are generated and giveback
duration is within limits.

=item C<< nacltask_wait_for_asup_generation => 0 >>

(Optional, defaults to 0)
User input flag that is used to wait if giveback is vetoed
due to autosupport.

=item C<< nacltask_wait_for_partner => 1 >>

(Optional) user input flag that is used to wait for partner to come to wfg
state before issuing a giveback
Default : 1

=item C<< nacltask_verify_asup => $boolean >>

(Optional, defaults to 0)
If 1, then verify that the appropriate asup's are generated.
Currently not supported.

=item C<< nacltask_verify_disk_ownership => $boolean >>

(Optional, defaults to 0)
If 1, then verify that the current owner for disks in sfo-styled aggrs is the
node

=item C<< nacltask_verify_arl_ems => $boolean >>

(Optional, defaults to 0)
If 1, then verify aggregate temporary relocation ems.

=back

=back

=cut

sub giveback {
    my ($self, %args) = @_;
    $self->_execute_with_gather_diag(
        method => 'giveback_executed',
        exception => 'NACL::MTask::Exceptions::HAGatherDiagException',
        method_args => \%args,
    );
}

sub giveback_executed {
    $Log->enter() if $may_enter;
    my $self = shift;

    my %opts = validate_with(
        params => \@_,
        spec   => {
            giveback_expected      => { type => BOOLEAN, default => 1 },
            auto_giveback_expected => { type => BOOLEAN, default => 0 },
            'require-partner-waiting' =>
              { regex => qr/^(true|false)$/, optional => 1 },
            'override-vetoes' => { regex => qr/^(true|false)$/, optional => 1 },
            'only-cfo-aggregates' =>
              { regex => qr/^(true|false)$/, optional => 1 },
            polling_interval => { type => SCALAR, default  => POLL_INTERVAL },
            failure_reason   => { type => SCALAR, optional => 1 },
            method_timeout   => { type => SCALAR, optional => 1 },
            nacltask_verify_sfo_state => { type => SCALAR, default => 1 },
            nacltask_verify_aggr_state_owner =>
              { type => BOOLEAN, default => 1 },
            nacltask_verify_ems => { type => BOOLEAN, default => 1 },
            nacltask_wait_for_asup_generation =>
              { type => BOOLEAN, default => 0 },
            nacltask_wait_for_partner   => { type => BOOLEAN, default => 1 },
            nacltask_verify_arl_ems     => { type => BOOLEAN, default => 0 },
            nacltask_skip_auto_giveback => { type => BOOLEAN, default => 0 },
        },
    );

    my %common_opts;
    $self->_move_common_component_params(
        source => \%opts,
        target => \%common_opts
    );
    $self->node()->refresh_command_interface();
    my $node      = $self->node();
    my $node_name = $node->node();
    my $partner =
      NACL::STask::Node->get_partner_obj( command_interface => $node );
    my $partner_name  = $partner->node();
    ## Temporary fix to bypass the ems verification ( Fix will be back ported after the burt749767  is fixed )
    my $ems_check     = delete $opts{nacltask_verify_ems};
    $ems_check = !$::BYPASS_EMS if ( defined $::BYPASS_EMS && $ems_check );
    my $arl_ems_check = delete $opts{nacltask_verify_arl_ems};
    my $aggr_check    = delete $opts{nacltask_verify_aggr_state_owner};
    my @partner_aggr_names =
      ( $self->_partner_online_sfo_aggrs, $self->_partner_cfo_aggr );
#    my $detector;
    my @online_sfo_aggregates = $self->_partner_online_sfo_aggrs();
    my $timeout;

    if (@online_sfo_aggregates) {
        $timeout = scalar @online_sfo_aggregates * SFO_AGGR_ONLINE_TIMEOUT;
        $timeout = $timeout + WFG_TO_SF_UP_TIMEOUT;
    }
    else {
        $timeout = WFG_TO_SF_UP_TIMEOUT;
    }

    # store any errors we have encountered
    my @error;

    # Constructing the hash to send to STask giveback
    my %sfo_opts;
    my @copy_opts = (
        'require-partner-waiting', 'override-vetoes',
        'only-cfo-aggregates',     'auto_giveback_expected',
        'nacltask_skip_auto_giveback'
    );
    my %opts_mapping = (
        nacltask_verify_sfo_state         => 'nacltask_verify',
        failure_reason                    => 'failure-reason',
        method_timeout                    => 'method-timeout',
        polling_interval                  => 'nacltask_poll_interval',
        nacltask_wait_for_asup_generation => 'wait-for-asup-generation',
        nacltask_wait_for_partner         => 'wait_for_partner',
    );

    $self->_hash_copy(
        source => \%opts,
        target => \%sfo_opts,
        map    => \%opts_mapping,
        copy   => \@copy_opts,
    );

    $sfo_opts{node}    = $node;
    $sfo_opts{partner} = $partner;

    if ( !defined $sfo_opts{'method-timeout'} ) {
        $sfo_opts{'method-timeout'} = $timeout;
    }

    # Set the verification details
    if ( !( $opts{giveback_expected} ) or ( defined $opts{failure_reason} ) ) {
        $ems_check  = 0;
        $aggr_check = 0;
    }

    # Construct and start the EventLogDetector object
    my @check_for_all_presence = ( 'cf.fm.givebackStarted', 'cf.fm.givebackComplete', 'cf.fm.givebackDuration' );

    my $detector = NACL::MTask::EventLogDetector->new(
            command_interface      => $node,
        );
    $detector->start();
    my $proc;
    if ($ems_check) {
        my %args = ( 'detector' => $detector );
        $proc = NATE::Process->new(
            codespec => sub { $self->_event_collector(@_); },
            args => [ %args ],
            runid => 'GB_event_logs',
        );

        #start collecting event log.
        $proc->start;
    }

    # Calling the STask giveback to initiate a giveback
    # 320 seconds timeout for node to boot from wfg to
    # SF_UP. See Also burt603319.
    NACL::STask::StorageFailover->giveback(
        command_interface => $node,
        %sfo_opts,
        %common_opts,
    );
    sleep(60);

    # Call verify methods to validate the different elements after giveback
    # Aggr check
    if ($aggr_check) {
        try {

            # if 'only-cfo-aggregates' is true then verify
            # that sfo aggregates are not given back
            if ( $opts{'only-cfo-aggregates'} eq "true" ) {
                $self->_verify_aggr_state_owner(
                    aggr_names => \@partner_aggr_names,
                    home_name  => $partner_name,
                    owner_name => $node_name,
                );
            }
            else {
                $self->_verify_aggr_state_owner(
                    aggr_names => \@partner_aggr_names,
                    home_name  => $partner_name,
                    owner_name => $partner_name,
                );
            }
        }
        catch NACL::Exceptions::UnexpectedState with {
            my $exception_object = shift;
            push( @error, $exception_object->text() );
        };
    }

    my $modify_system_date = sub {
        my $SystemNodeDate = NACL::CS::ClusterDate->fetch(
            command_interface => $node,
            filter            => { node => $node_name }
        );
        my $Date = $SystemNodeDate->date(); 
        $Log->debug('Date     : $Date');
        my @dateandtime = split /\"|\/| |\:/, $Date;
        my $Zero = 0;
        my $month;
        my $day;
        if ( $dateandtime[1] <= 9 ) {
            $month = $Zero . $dateandtime[1];
        }
        else {
            $month = $dateandtime[1];
        }
        if ( $dateandtime[2] <= 9 ) {
            $day = $Zero . $dateandtime[2];
        }
        else {
            $day = $dateandtime[2];
        }

        NACL::C::ClusterDate->modify(
            command_interface => $node,
            dateandtime =>
"$dateandtime[3]$month$day$dateandtime[4]$dateandtime[5].$dateandtime[6]",
        );
    };

    if ( $ems_check || $arl_ems_check ) {
        $modify_system_date->();
    }

    # EMS check
    if ($ems_check) {
        try {
            my @all_events;
            my $ems = $self->_get_result_and_stop( proc => $proc) if ( $proc->is_running() );
            my %ems = map { $_->{messagename},1 } @$ems;
            my @missed_ems = map { $_  } grep ( !defined $ems{$_}, @check_for_all_presence );
            if ( @missed_ems ) {
                $Log->exit() if $may_exit;
                NACL::Exceptions::EventCheckFailure->throw(
                    'EMS Not Found : ' . Dumper( \@missed_ems ) );

            }

            foreach my $messagename ( @check_for_all_presence ) {
                print Dumper ($messagename ); 
                $self->_print_ems_messages(
                    event_array => $ems,
                    event        => $messagename,
                );
            }

            $Log->comment("Verify the giveback duration");
            $self->_verify_takeover_giveback_duration(
                event_array => $ems );
        }
        catch NACL::Exceptions::EventCheckFailure with {
            my $exception_object = shift;
            push( @error, $exception_object->text() );
        };
    }

    # Verify aggregate temporary relocation ems
    if ( $arl_ems_check && defined $self->arl_mtask_obj() ) {
        $self->arl_mtask_obj()->verify_fanta_to_ems_messages();
    }

    $Log->exit() if $may_exit;

    # check if there are no errors
    if (@error) {
        $Log->exit() if $may_exit;
        NACL::Exceptions::OperationFailed->throw(
            "Giveback failed: " . join( "\n", @error ) );
    }

    $Log->exit() if $may_exit;
} ## end sub giveback

=head2 reset_HA_flags

    $HA1->reset_HA_flags(node => $node);

(Instance method)
Method to reset the following HA related flags to their default values
This method is called by default on creating a instance of this package
    'auto-giveback'
    'check-partner'
    'detection-time'
    'onfailure'
    'onpanic'
    'onreboot'
    'abort-operations'
    'delay-seconds'

=over

=item Options

=over

=item C<< node => $node >>

(Required, isa NACL::C::CommandInterface)
The node on which to reset the HA flags.

=back

=back

=cut
sub reset_HA_flags {
    my ($self, %args) = @_;
    $self->_execute_with_gather_diag(
        method => 'reset_HA_flags_executed',
        exception => 'NACL::MTask::Exceptions::HAGatherDiagException',
        method_args => \%args,
    );
}

sub reset_HA_flags_executed {
    $Log->enter() if $may_enter;

    my $self = shift;
    my %opts = validate_with(
        params => \@_,
        spec   => { node => { type => SCALAR }, }
    );

    my %common_opts;
    $self->_move_common_component_params(
        source => \%opts,
        target => \%common_opts
    );

    my $detection_time;
    $Log->comment("Verify setup is MCC_IP or not");
    my $node          = $self->node();
    my $mcc_ip    = $node->apiset()->execute_raw_command(command => "metrocluster show -fields configuration-type");

    if ($mcc_ip =~ /IP-fabric/) {
        $detection_time = MCC_IP_DETECTION_TIME ;
    } else {
        $detection_time = NON_MCC_IP_DETECTION_TIME ;
    }

    try {
        NACL::C::StorageFailover->modify(
            command_interface  => $self->node(),
            node               => $opts{node},
            'auto-giveback'    => "true",
            'detection-time'   => $detection_time,
            'onfailure'        => "true",
            'onpanic'          => "true",
            'onreboot'         => "true",
            'delay-seconds'    => "600",
            'method-timeout'   => '180',
            %common_opts,
        );
    }
    catch NACL::APISet::Exceptions::ResponseException with {
        my $exception_object = shift;
        $Log->comment(
            "Failed to reset the HA flags, " . $exception_object->text() );
        NACL::APISet::Exceptions::ResponseException->throw(
            "Failed to reset HA flags");
    };

    $Log->exit() if $may_exit;
} ## end sub reset_HA_flags

## Method to verify aggregates' state is online and ownership of the aggregates
sub _verify_aggr_state_owner() {
    $Log->enter() if $may_enter;
    my ( $self, @args ) = @_;
    my %opts = validate_with(
        params => \@args,
        spec   => {
            aggr_names => { type => ARRAYREF },
            home_name  => { type => SCALAR },
            owner_name => { type => SCALAR },
        }
    );

    my $destination = $opts{owner_name};
    my $home_name   = $opts{home_name};
    my %common_opts;
    $self->_move_common_component_params(
        source => \%opts,
        target => \%common_opts
    );
    my $timeout = AGGR_ONLINE_TIMEOUT;
    my @unexpected_results;
    my @aggregate_list     = $self->_partner_online_sfo_aggrs();
    my $aggregate_vol_list = $self->_partner_sfo_aggr_volumes();
    my $node               = $self->node();
    $Log->comment(
        "This is the sfo aggregate_list " . Dumper( \@aggregate_list ) );

    my %aggr_vol = %$aggregate_vol_list;
    foreach my $aggregate (@aggregate_list) {
        $Log->comment("This is the aggregate $aggregate ");
        try {
            $Log->comment( "Verify that state=online, owner-name=$destination "
                  . "home-name=$home_name, for aggr $aggregate" );

            my $ver_mgr = $node->get_version_manager();
            my $build_type = $ver_mgr->get_version_attribute( attribute => "build_options" );  # Type of build Eg: x86_64.debug.gcov

            $Log->comment("build_type is : $build_type");
            $Log->comment("timeout is : $timeout");
            if ($build_type =~ /gcov/i ) {
                $timeout = 6 * $timeout;
                $Log->comment("timeout is setting for gcov: $timeout");
            } else {
                $Log->comment("timeout is setting for non-gcov: $timeout");
            }

            nacl_method_retry(
                code => sub {
                    NACL::C::StorageAggregate->wait_on_attributes(
                        aggregate         => $aggregate,
                        command_interface => $node,
                        attributes        => [
                            {   attribute_to_check => 'state',
                                till_value         => 'online',
                            },
                            {   attribute_to_check => 'owner-name',
                                till_value         => $destination,
                            },
                            {   attribute_to_check => 'home-name',
                                till_value         => $home_name,
                            }
                        ],
                        'method-timeout' => $timeout
                    );
                },
                tries_count => 6,
                sleep_time  => 10,
                exceptions =>
                    'NACL::C::Exceptions::StorageAggregate::ZsmConnectionError'
            );

            if ( defined $aggr_vol{$aggregate} ) {
                my %vol_details = %{ $aggr_vol{$aggregate} };

                while ( my ( $key, $value ) = each %vol_details ) {
                    $Log->comment( "Verifying that state="
                          . $value->{state}
                          . " for volume $key in aggr $aggregate" );
                    $self->_check_volume_state(
                        volume           => $key,
                        vserver          => $value->{'vserver'},
                        aggregate        => $aggregate,
                        state            => $value->{'state'},
                        'method-timeout' => $timeout,
                        polling_interval => POLL_INTERVAL,
                    );
                }
            }
        }
        catch NACL::Exceptions::Timeout with {
            my $exception = shift;
            $Log->debug(
                sub {
                    "Exception text received in _verify_aggr_state_owner "
                      . $exception->text();
                }
            );
            my $error_message =
                "After waiting for $timeout seconds the relocated "
              . "Aggregate attributes\nowner_name,home_name,state,"
              . "volumes' state do not have expected values for "
              . "aggregate $aggregate";

            $Log->debug( sub { $error_message } );
            my $exception_member->{'aggregate'} = $aggregate;
            $exception_member->{'text'} = $exception->text();
            push @unexpected_results, $exception_member;

        };
    }    ## foreach ends here
    if ( scalar @unexpected_results ) {
        my $text = join "\n",
          map { "$_->{aggregate} : $_->{text}" } @unexpected_results;
        $Log->exit() if $may_exit;
        NACL::Exceptions::UnexpectedState->throw(
"List of failures in the Aggregate verification are as follows\n$text\n",
            unexpected_results => \@unexpected_results
        );
    }
    ### Verify CFO aggr ownership and state.
    my $cfo_aggr = $self->_partner_cfo_aggr();
    my ($state, $home, $owner);
    my $end_time = time() + CFO_AGGR_ONLINE_TIMEOUT;

    while ( time() < $end_time ) {
        $Log->comment("Inside the loop to check the aggr state");
        my $aggr_obj = NACL::CS::StorageAggregate->fetch(
            command_interface => $self->node(),
            filter            => { aggregate => $cfo_aggr },
            requested_fields  => [qw(aggregate state home-name owner-name)],
            %common_opts,
        );
        $home  = $aggr_obj->home_name();
        $owner = $aggr_obj->owner_name();
        if(($home eq "-") or ($owner eq "-")){
            sleep(10);
        }else{
            $state = $aggr_obj->state();
            last;
        }
    }
    ## verify ownership of cfo aggr
    if (   ( $owner ne $opts{home_name} )
        or ( $home ne $opts{home_name} ) )
    {
        $Log->exit() if $may_exit;
        NACL::Exceptions::UnexpectedState->throw(
                "Owner of aggr $cfo_aggr not as expected, "
              . "Owner: Actual - $owner, Expected - $opts{owner_name}, "
              . "Home: Actual - $home, Expected - $opts{home_name}, " );
    }
    ## verify whether aggr is online
    if ( $state ne "online" ) {
        $Log->exit() if $may_exit;
        NACL::Exceptions::UnexpectedState->throw(
            "Aggr $cfo_aggr is not online");
    }
    $Log->exit() if $may_exit;
} ## end sub _verify_aggr_state_owner()

## Helper method to print the 'cf' ems messages in EventLogDetector object
sub _print_ems_messages {
    $Log->enter() if $may_enter;
    my $self = shift;
    my %opts = validate_with(
        params => \@_,
        spec   => {
            event_array              => { type => ARRAYREF },
            event                    => { type => SCALAR },
            takeover_type            => { type => SCALAR, optional => 1 },
        }
    );

    my %common_opts;
    $self->_move_common_component_params(
        source => \%opts,
        target => \%common_opts
    );
    my $event_array = $opts{event_array};
    my $event = $opts{event};
    my $takeover_type = $opts{takeover_type};

    foreach my $hash (@$event_array) {
        my $name = $hash->{messagename};
        if ( $name eq $event ) {
            my $ems = $hash->{event}; 
            $Log->comment("EMS : $ems is present");
            my $hwassist_ems = "cf.hwassist.takeoverTrapRecv";

            #Checking if the ems is hwassist ems, and printing the messages
            if ( $ems =~ /$hwassist_ems/ ) {
                if (    ( $ems =~ /power_cycle_via_/ )
                    and ( $takeover_type = "system_powercycle" ) )
                {
                $Log->comment(
"Received trap message: $ems with power_cycle_via_sp/rlm"
                );
            }
            elsif ( ( $ems =~ /power_off_via_/ )
                and ( $takeover_type = "system_power_off" ) )
            {
                $Log->comment(
                    "Received trap message: $ems with power_off_via_sp/rlm"
                );
            }
            elsif ( ( $ems =~ /reset_/ )
                and ( $takeover_type = "system_reset" ) )
            {
                $Log->comment(
                    "Received trap message: $ems with reset_via_sp/rlm");
            }
            elsif ( ( $ems =~ /watchdog_reset/ )
                and ( $takeover_type = "watchdog_reset" ) )
            {
                $Log->comment(
                    "Received trap message: $ems with watchdog_reset");
            }
            else {
                $Log->exit() if $may_exit;
                NACL::Exceptions::EventCheckFailure->throw(
                    "EMS $ems is not having the correct trap event");
            }
        }
        last;
    }
    }
    $Log->exit() if $may_exit;
} ## end sub _print_ems_messages

## Helper method to get the takeover/giveback duration from the ems msgs
sub _verify_takeover_giveback_duration {
    $Log->enter() if $may_enter;
    my $self = shift;

    my %opts = validate_with(
        params => \@_,
        spec   => {
            event_array =>
              { type => ARRAYREF }
        }
    );

    my $verify   = 0;
    my $event_array = $opts{event_array};
    #my $events   = $detector->events();
    foreach my $event ( @{$event_array} ) {
        my $event_name = $event->{event};
        if ( $event_name =~ /Duration/ ) {
            $verify = 1;
            my @tmp = split( /\s+/, $event_name );
            my $duration = $tmp[7];
            if ( $duration > MAX_TO_GB_DURATION ) {
                $Log->warn(
" $tmp[3] duration is greater than 60 seconds : Actual Duration = $duration"
                );
            }
            else {
                $Log->comment("$tmp[3] has completed in $duration seconds");
            }
        }
    }
    if ( !$verify ) {
        $Log->exit() if $may_exit;
        NACL::Exceptions::EventCheckFailure->throw(
            "takeover/giveback duration ems not found");
    }

    $Log->exit() if $may_exit;
} ## end sub _verify_takeover_giveback_duration

sub _verify_fanta_ems {
    $Log->enter() if $may_enter;
    my $self = shift;
    my %opts = validate_with(
        params => \@_,
        spec   => {
            event_array =>
              { type => ARRAYREF },
            sfo_aggregates => { type => ARRAYREF },
        }
    );

    my $version_manager = $self->node->get_version_manager();
    my $modelinfo =
      $version_manager->get_version_attribute( attribute => 'modelinfo' );
    my $event_names;
    my @sfo_msgs;
    my @non_sfo_msgs;
    if ( $modelinfo =~ m[SIMBOX]i ) {
        $event_names = "ha.takeover*,sfo.takeover*";
        @sfo_msgs    = (
            'ha.takeover.stateChng', 'sfo.takeover.sfoStart',
            'sfo.takeover.relocDone'
        );
        @non_sfo_msgs = ('sfo.takeover.bypassed');
    }
    else {
        $event_names = "ha.takeover*,sfo.takeover*,cf.transition*";
        @sfo_msgs    = (
            'ha.takeover.stateChng',  'sfo.takeover.sfoStart',
            'sfo.takeover.relocDone', 'cf.transition.summary'
        );
        @non_sfo_msgs = ( 'sfo.takeover.bypassed', 'cf.transition.summary' );
    }

    my @events = ( @sfo_msgs , @non_sfo_msgs );
    my $event_array = $opts{event_array};
    foreach my $event ( @events ) {
        $self->_print_ems_messages(
            event_array => $event_array,
            event        => $event
        );
     }

    my %ems_messages = (
        sfo_aggrs   => \@sfo_msgs,
        no_sfo_aggr => \@non_sfo_msgs
    );
    my @aggrs                = @{ $opts{sfo_aggregates} };
    my $ems_key              = @aggrs ? 'sfo_aggrs' : 'no_sfo_aggr';
    my $error                = 0;
    my $verify_failed        = 0;
    my $error_msgs           = undef;
    my $matched_event_buffer = undef;
    foreach my $msg ( @{ $ems_messages{$ems_key} } ) {
        $error = 1;
        foreach my $event ( @{$event_array} ) {
            my $event_name = $event->{event};
            if ( $event_name =~ m[$msg] ) {
                $matched_event_buffer .= $event_name . "\n";
                $error = 0;
            }
        }
        if ($error) {
            $error_msgs .= "$msg ems not found\n";
            $verify_failed = 1;
        }
    }

    if ($verify_failed) {
        $Log->exit() if $may_exit;
        NACL::Exceptions::EventCheckFailure->throw($error_msgs);
    }

    $Log->exit() if $may_exit;
} ## end sub _verify_fanta_ems

sub _check_volume_state {
    $Log->enter() if $may_enter;
    my $self = shift;

    # validate the parameters
    my %opts = validate_with(
        params => \@_,
        spec   => {
            aggregate        => { type => SCALAR },
            volume           => { type => SCALAR },
            vserver          => { type => SCALAR },
            state            => { type => SCALAR, default => 'online' },
            'method-timeout' => { type => SCALAR },
            polling_interval => { type => SCALAR },
        },
    );

    my $volume           = delete $opts{volume};
    my $vserver          = delete $opts{vserver};
    my $aggregate        = delete $opts{aggregate};
    my $state            = delete $opts{state};
    my $timeout          = delete $opts{'method-timeout'};
    my $polling_interval = delete $opts{polling_interval};

    # query NACL::CS::Volume to see status.
    my $found    = 1;
    my $end_time = time() + $timeout;
    my $vol_cs_obj;
    while ($found) {
        try {
            $vol_cs_obj = NACL::CS::Volume->fetch(
                command_interface => $self->node(),
                requested_fields  => ['state'],
                is_system_vol    => 0,
                filter            => {
                    volume    => $volume,
                    vserver   => $vserver,
                    aggregate => $aggregate
                }
            );
            if ( $vol_cs_obj->state ne $state ) {
                my $vol_obj = $vol_cs_obj->get_component_instance();
                $vol_obj->wait_on_attribute(
                    'method-timeout'   => $timeout,
                    attribute_to_check => "state",
                    till_value         => [$state],
                    polling_interval   => $polling_interval
                );
            }
            $found = 0;
        } ## end try
        catch NACL::Exceptions::NoElementsFound with {

            # When volume move operation is complete
            # then the volume entry is removed from
            # "volume move status"
            Tharn::snooze(POLL_DELTA);
        };
        if ( time() > $end_time ) {
            $Log->exit() if $may_exit;
            NACL::Exceptions::Timeout->throw(
                "After waiting for $timeout seconds 'state' of volume $volume "
                  . " in aggregate $aggregate is not expected $state " );
        }
    } ## end while ($found)
    $Log->exit() if $may_exit;
} ## end sub _check_volume_state

sub _check_ems_log {
    $Log->enter() if $may_enter;
    my ( $self, @args ) = @_;
    my %opts = validate_with(
        params => \@args,
        spec   => {
            detector =>
              { type => OBJECT, isa => 'NACL::MTask::SystemLogDetector' },
            sfo_aggrs              => { type => ARRAYREF },
            takeover_type          => { type => SCALAR },
            nacltask_log_retries   => { type => SCALAR, optional => 1 },
            nacltask_poll_interval => { type => SCALAR, optional => 1 },
            nacltask_is_fanta      => { type => BOOLEAN },
        },
        allow_extra => 1
    );

    my $retries       = delete $opts{nacltask_log_retires}   || 3;
    my $poll_interval = delete $opts{nacltask_poll_interval} || 10;
    my $is_fanta      = delete $opts{nacltask_is_fanta};
    my $aggrs         = delete $opts{sfo_aggrs};
    my $detector      = delete $opts{detector};
    my $takeover_type = delete $opts{takeover_type};

    my %fanta_ems = (
        SIMBOX => {
            sfo_aggr_ems => [
                'ha.takeover.stateChng', 'sfo.takeover.sfoStart',
                'sfo.takeover.relocDone'
            ],
            non_sfo_aggr_ems => ['sfo.takeover.bypassed'],
        },
        HARDWARE => {
            sfo_aggr_ems => [
                'ha.takeover.stateChng',  'sfo.takeover.sfoStart',
                'sfo.takeover.relocDone', 'cf.transition.summary'
            ],
            non_sfo_aggr_ems => [
               'sfo.takeover.bypassed', 'cf.transition.summary'
            ],
        },
    );

    my %hwassist_ems = (
        system_powercycle => 'power_cycle_via_',
        system_power_off  => 'power_off_via_',
        system_reset      => 'reset_',
        watchdog_reset    => 'watchdog_reset',
    );

    my @common_ems = (
        'cf.fm.takeoverStarted', 'cf.fm.takeoverDuration',
        'cf.fm.takeoverComplete'
    );
    my $hwassist_flag = 0;
    my @ems_array     = ();

    if ($is_fanta) {
        my $version_manager = $self->node->get_version_manager();
        my $modelinfo =
          $version_manager->get_version_attribute( attribute => 'modelinfo' );
        my $model = $modelinfo =~ m[simbox]i ? 'SIMBOX' : 'HARDWARE';
        my $sfo_aggr = @$aggrs ? 'sfo_aggr_ems' : 'non_sfo_aggr_ems';
        push @ems_array, @{ $fanta_ems{$model}->{$sfo_aggr} };
    }

    if (   $takeover_type eq 'system_powercycle'
        || $takeover_type eq 'system_power_off'
        || $takeover_type eq 'system_reset'
        || $takeover_type eq 'watchdog_reset' )
    {
        $hwassist_flag = 1;
        push @ems_array, 'cf.hwassist.takeoverTrapRecv';
        push @ems_array, 'cf.fsm.stateTransit';
    }

    push @ems_array, @common_ems;

    my $found       = 1;
    my $buffer      = undef;
    my @missing_ems = ();
    my $msgs = undef;

    while ($retries) {
        try {
            $Log->debug("Retry is: $retries");
            $msgs = $detector->get_logs(
                skip_cached_logs     => 1,
                command_output_level => 'trace'
            );
        } catch NACL::APISet::Exceptions::TimeoutException with {
            my $exception = shift;
            $msgs=$exception->output();
            $detector->nacltask_timeout($detector->nacltask_timeout + 600);
        };
        $found       = 1;
        $buffer      = undef;
        @missing_ems = ();
        foreach my $ems (@ems_array) {
            $ems =~ s/\./_/g;
            if ( $msgs =~ m[$ems]i ) {
                $found = 0
                  if ( $hwassist_flag
                    && $msgs !~ m[$hwassist_ems{$takeover_type}]i );
                $buffer .= "$1\n\n" while ( $msgs =~ m[<(LR.*?$ems.*?)/>]gsi );
            }
            else {
                $found = 0;
                push @missing_ems, $ems;
            }
        }

        last if ( $retries && $found );
        $retries--;
        Tharn::snooze $poll_interval;
    }

    if ( !$retries ) {
        $Log->exit() if $may_exit;
        NACL::Exceptions::EventCheckFailure->throw(
            'EMS Not Found : ' . Dumper( \@missing_ems ) );
    }

    $Log->trace( '-------------EMS logs----------------' . "\n" . $buffer );
    $Log->exit() if $may_exit;
}

sub _event_collector {

    my ( $self, @args ) = @_;
    my %opts = validate_with(
        params => \@args,
        spec   => {
            detector =>
              { type => OBJECT, isa => 'NACL::MTask::EventLogDetector' },
        },
        allow_extra => 1
    );

    
    my $detector = $opts{detector}; 
    my @all_events = ();
    my $listener;
    my $node = $self->node();
    my $node_name = $node->node();

    ## This will be called when Parent sends stop_process signal.
    ## When stop_process is called the result is sent through IPC
    ## the return message is marked by 'transfer_result' message.
    my $callback_sub_ref = sub {
        $Log->comment("Sending message to Parent");
        $Parent->message_put(type => "transfer_result", message => \@all_events);
        listener_remove($listener);
    };

    $listener = {
                 filter   => sub{($_[0]->{type} eq "stop_process")},
                 callback => $callback_sub_ref
    };

    listener_add($listener);
     
    #add events for TO & GB too.
    my $master_event_list = [
                    qr/^cf\.fsm\.state.*/,
                    qr/^cf\.hwassist\.takeoverTrap.*/,
                    qr/^ha\.takeover.*/,
                    qr/^sfo\.takeover.*/,
                    qr/^cf\.transition.*/,
                    qr/^cf\.fsm\.state.*/,
                    qr/^cf\.hwassist\.takeoverTrap.*/,
                    'ha.takeover.stateChng',
                    'sfo.takeover.sfoStart',
                    'sfo.takeover.relocDone',
                    'sfo.takeover.bypassed',
                    'cf.fm.takeoverStarted',
                    'cf.fm.takeoverComplete',
                    'cf.fm.takeoverDuration',
                    'cf.fsm.stateTransit',
                    'cf.hwassist.takeoverTrapRecv',
                    'cf.fm.givebackStarted',
                    'cf.fm.givebackComplete',
                    'cf.fm.givebackDuration'
                ];
 
    while (1) {
        try {
            my @events=$detector->stop(
 		    node => $node_name,
                    check_for_all_presence => $master_event_list,
                    ignore_if_unsynchronized => 1);

            # Control will never come here... but just in case...
            my @messages = map {   { messagename => $_->messagename(),
                                      time        => $_->time(),
                                      event       => $_->event(),
                                      seqnum      => $_->seqnum(),
                                   }   
                               }  @events ;

            #Sending object like following does not work in IPC. 
            push (@all_events, @messages);
        }
        catch NACL::Exceptions::EventCheckFailure with {
            # get list of events that are not yet found.
            # It is unlikely that all events will be found
            # Because each 'takeover' type is different & this 
            # Master list contains all the events for all takeover type.
            # But this is OK. All we are interested is to find remaining 
            # events. 
            my $exception = shift;
            my $matched_event_arr_ref = $exception->matched_events();

            if ($matched_event_arr_ref && @$matched_event_arr_ref) {
                $Log->comment("matched events: ", Dumper($matched_event_arr_ref));
                        
                my @messages = map {   { messagename => $_->messagename(),
                                          time        => $_->time(),
                                          event       => $_->event(),
                                          seqnum      => $_->seqnum(),
                                       }   
                                   }  @$matched_event_arr_ref ;
                                           
                 #Sending object like following does not work in IPC. 
                push (@all_events, @messages); 
            }

            # reduce master event list to what is not discovered yet.
            if ($exception->unmatched_events()) { $master_event_list = $exception->unmatched_events();    
                $Log->comment("**master event list**: @$master_event_list");
            }
        };

        #reset begin time for event log fetch.
        $detector->event_begin_time($detector->event_end_time());

        eventloop(seconds => 2);

    }
}
sub _get_result_and_stop {
  
    my ( $self , @args ) = @_; 
    my %opts = validate_with(
        params => \@args,
        spec   => {
            till_time => { type => SCALAR, optional => 1 },
            proc => { type =>OBJECT , isa => 'NATE::Process' },
        },
        allow_extra => 1,
    );
    my $till_time = $opts{till_time};
    my $proc = $opts{proc};
    my $ems;
    if ($till_time ) {
         ## Wait for '$till_time' seconds for ems generation and stop the process
         sleep($till_time); 
     }
     # send the stop signal to child proces.
     $proc->message_put(type => "stop_process", message => "",);
     # get result from child process.
     $ems = $proc->message_get(type => "transfer_result");

     #$result structure
     # reference to Array of hashes.
     # [ 
     # { 
     #    'time' => '"11/8/2013 10:28:30"',
     #    'seqnum' => '1798',
     #    'messagename' => 'license.db.migrate.vol.success',
     #    'event' => '"license.db.migrate.vol.success: Successfully migrated licenses for volume component_flex_aggr "'
     # },
     # { 
     #    'time' => '"11/8/2013 10:58:00"',
     #    'seqnum' => '1882',
     #    'messagename' => 'raid.vol.disk.add.done',
     #    'event' => '"raid.vol.disk.add.done: Addition of Disk /component_flex_aggr/plex0/rg0/0c.4 Shelf - Bay - [VMware   Virtualdisk      1.0 ] S/N [6000c296fbe535c9cf355033bbb01210] to aggregate component_flex_aggr has completed successfully "'
     # },
     # ]

     $proc->stop;
     $proc->destroy;
    
     return $ems;
 } 

sub _execute_with_gather_diag {
    my ($self, @opts) = @_;

    my %opts = validate_with(
        params => \@opts,
        spec   => {
            method => { type => SCALAR },
            exception => { type => SCALAR, },
            method_args => { type => HASHREF, optional => 1},
        },
    );

    my $method = $opts{method};
    my $exception = $opts{exception};
    my %sfo_args;
    if (defined $opts{method_args}) {
        %sfo_args = %{$opts{method_args}};
    }
    try {
        $self->$method(%sfo_args);
    } otherwise {
        my $ex = shift();
        my $exception_obj = thaw(freeze($ex));
        
        # Reblessing $exception to be of type $exception_obj
        $exception->convert(exception => $exception_obj);
        
        $sfo_args{command_interface} = $self->node(); 
        $exception_obj->sfo_args(%sfo_args);
        $exception_obj->make_base_of($ex);
        $ex->throw();
    };
}
1;