#
# Copyright (c) 2014 NetApp, Inc., All Rights Reserved
#   Any use, modification, or distribution is prohibited
#   without prior written consent from NetApp, Inc.
#

package NACL::MTask::ScaledPopulation;
use Moose;

use Tharn;
use NATE::Log qw(log_global);
my $Log = log_global();
my $may_enter = $Log->may_enter();
my $may_exit  = $Log->may_exit();
use NATE::BaseException qw(:try);
use List::Util qw(min max);

=head1 NAME

NACL::MTask::ScaledPopulation

=head1 SYNOPSIS

    use NACL::MTask::ScaledPopulation;
    use NACL::MTask::DataSet;

    my @Basic_Datasets = (
        'NACL::MTask::DataSet::QtreeBasic',
        'NACL::MTask::DataSet::VBNBADBasic',
        'NACL::MTask::DataSet::FileReservationBasic',
        'NACL::MTask::DataSet::HardlinkBasic',
    );

    my $scaled_populate = NACL::MTask::ScaledPopulation->new(
        volume => $volume,
        clients => [$topology->clients()],
        dataset_pkg_list => [ @Basic_Datasets ],
        fast_mode => 1,
    );
    $scaled_populate->populate();

    # Find the DataSets that were populated on the volume
    my @datasets = NACL::MTask::DataSet->find(
        volume => $volume,
        clients => [$topology->clients()],
    );

    # Validate the DataSets returned for up to 10 minutes
    my $end_time = time() + 60*10;
    while (time() < $end_time) {
        my $data_set = shift @datasets;
        push @datasets, $data_set;

        $data_set->validate();
    }

=head1 DESCRIPTION

Please read L<NACL::MTask::DataSet|lib-NACL-MTask-DataSet-pm> to understand the
general behavior and capabilities of DataSet libraries.

ScaledPopulation helps the caller to write testcases that appropriately scale
to the available hardware. Testcases specify a desired duration and a desired
size. The desired size will usually be computed by taking a fraction of the
available space on the volume. The library will populate until both the desired
duration and desired size have been met.

To prevent the testcase from crashing or taking too long, some maximum durations
and maximum population sizes will be automatically computed or can be specified
by the caller. The max duration will prevent population from taking an excessive
amount of time if the filer is slow because of a debug build, vsim limitations,
or running in suspend/restart mode. The maximum size will prevent the population
from taking too much space on the volume and ensures that a reasonable amount of
space remains for the caller.

Any DataSets created on the volume can be found using
NACL::MTask::DataSet->find().

=head1 LIMITATIONS

This library currently can not be safely called from multiple tests running at
the same time on the same volume. The maximum is also not currently a hard
restriction on the amount of space taken, so configuring it near available size
may cause the library to exceed the volume's capacity and fail.

=head1 ATTRIBUTES

=head2 maximum_population_duration

The duration in seconds that we should avoid exceeding for population time. The
library will attempt to avoid exceeding this, but long running DataSets may end
up causing this to be exceeded.

=head2 maximum_population_bytes

The maximum number of bytes that we should populate to the volume. The library
will attempt to avoid exceeding this, but DataSets don't currently tell us how
much space they will use before they populate. If an upper bound is eventually
provided by the DataSet we can eventually guarantee the maximum will not be
exceeded.

=head2 desired_population_bytes

The number of bytes that we should attempt to populate on the volume. Population
will keep going until this value is hit, but stop after either maximum is hit.
The library may not reach the number of desired bytes if the system is very
slow and the maximum_population_duration is hit.

=head2 desired_population_duration

The number of seconds that the library should attempt to populate for. This may
not be reached if the system is very fast and maximum_population_bytes is hit
before the desired duration is met.

=head2 volume

A L<NACL::C::Volume|lib-NACL-C-Volume-pm> instance of the volume to populate on.

=head2 clients

An array reference of L<NACL::C::Client|lib-NACL-C-Client-pm> instances to use
for populating the NACL::MTask::DataSet instances. At least one Windows and
Linux client are required for most DataSets.

=head2 dataset_pkg_list

An array reference of NACL::MTask::DataSet package names to use for the
population.

=cut

has 'maximum_population_duration' => (
    is => 'rw',
    isa => 'Int',
    predicate => 'has_maximum_population_duration',
);

has 'maximum_population_bytes' => (
    is => 'rw',
    isa => 'Int',
    predicate => 'has_maximum_population_bytes',
);

has 'desired_population_bytes' => (
    is => 'rw',
    isa => 'Int',
    default => 1024*1024*10,
);

has 'desired_population_duration' => (
    is => 'rw',
    isa => 'Int',
    default => 30,
);


has 'volume' => (
    is => 'ro',
    isa => 'Object',
    required => 1,
);

has 'clients' => (
    is => 'ro',
    isa => 'ArrayRef',
    required => 1,
);

# DataSets implement a minimal interface, but they can have optional
# enhanced interfaces. We don't want to prevent clients of this library from
# using that enhanced interface. This means we need to have an easy mode where
# we build the call for this library's client and an advanced mode where the
# client builds the call and passes a subref for us to call.
has 'dataset_pkg_list' => (
    is => 'rw',
    isa => 'ArrayRef[Str]',
    required => 1,
);

# These are private attributes

has 'dataset_usage_tracker' => (
    is => 'rw',
    isa => 'HashRef',
    default => sub { return {}; },
);

has 'fast_mode' => (
    is => 'rw',
    isa => 'Int',
    default => 0,
);


=head1 METHODS

Please read L<NACL::MTask::DataSet|lib-NACL-MTask-DataSet-pm> to understand the
general behavior and capabilities of DataSet libraries.

=head2 new


(Static method) Instantiate a ScaledPopulation library that will create DataSets
on a volume based on a configuration.

=over

Options

=over

=item C<< clients => [ NACL::C::Client instances ] >>

(Required) A list of clients to use for population, it should have at least
one Linux and one Windows client.

=item C<< volume => NACL::C::Volume instance >>

(Required) The NACL::C::Volume instance of the volume to populate on.

=item C<< desired_population_duration >>

(Optional) The amount of time in seconds to populate until. See the ATTRIBUTES
section for more detail.

=item C<< desired_population_bytes >>

(Optional) Keep populating until this number of bytes has been created. See the
ATTRIBUTES section for more detail.

=item C<< maximum_population_duration >>

(Optional) The maximum number of seconds to populate for. One will be computed
based on desired_population_duration if not specified. See the ATTRIBUTES
section for more detail.

=item C<< maximum_population_bytes >>

(Optional) The maximum number of bytes to populate. One will be computed based
on desired_population_bytes if not specified. See the ATTRIBUTES section for
more detail.

=back

=over

Returns an instance of this object.

=back

=back

=cut
sub BUILD {
    my ($self) = @_;

    $self->_use_pkgs();
    if ($self->fast_mode()) {
        $self->desired_population_duration(15);
        $self->maximum_population_duration(30);
        $self->desired_population_bytes(1024*1024*10);
        $self->maximum_population_bytes(1024*1024*20);
    } 

    if (!$self->has_maximum_population_bytes()) {
        my $vol_state = $self->volume()->state();

        my $extra_bytes = 0.25*$self->desired_population_bytes();
        # 25% will be very small if the volume is tiny, make sure it is at least
        # 10MB.
        $extra_bytes = max($extra_bytes,1024*1024*10);

        # If the proposed desired size is 10TB, 25% will be a large amount. Make
        # sure we don't go too crazy with the maximum amount. Limit the
        # extra_bytes to 10GB.
        $extra_bytes = min(1024*1024*1024*10,$extra_bytes);

        my $proposed_max_bytes = $self->desired_population_bytes()+$extra_bytes;

        # Ensure that maximum_population_bytes doesn't exceed the available
        # bytes. We will go up to 90% of the available space until we can set
        # tighter bounds on this.
        $proposed_max_bytes = int(min(
            $proposed_max_bytes,
            $vol_state->available()*.9
        ));

        $Log->comment("Setting maximum_population_bytes for volume ".
            $self->volume()->volume()." to $proposed_max_bytes ($extra_bytes ".
            "above the desired byte count)"
        );
        $self->maximum_population_bytes(
            $proposed_max_bytes
        );
    }

    if (!$self->has_maximum_population_duration()) {
        my $extra_duration;

        $extra_duration = 0.25*$self->desired_population_duration();

        # 25% will be very small if the duration is 15 seconds. Set the maximum
        # duration to at least 60 seconds beyond the desired duration.
        $extra_duration = max(60, $extra_duration);

        # Limit the extra duration to at most 30 minutes. 
        $extra_duration = min(60*30, $extra_duration);

        $Log->comment("Setting maximum_population_duration for volume ".
            $self->volume()->volume()." to ".
            $self->desired_population_duration()+$extra_duration.
            " ($extra_duration above the desired population duration)"
        );
        $self->maximum_population_duration(
            $self->desired_population_duration()+$extra_duration
        );
    }

    foreach my $dataset (@{$self->dataset_pkg_list()}) {
        $self->dataset_usage_tracker()->{$dataset} = {
            times_used => 0,
        }
    }
}

=head2 populate

    $scaled_population->populate();

(Instance method) Do the population as specified in the constructor. Block until
the population is complete.

=cut
sub populate {
    my ($self) = @_;

# While there is time remaining
# While there is space remaining
#   Use "next" dataset
#   If this dataset can be done in the time available
#   If this dataset can be done in the space available
    my $bytes_populated = 0;
    my $start_time = time();

    while ($self->_can_populate($bytes_populated,time()-$start_time) && 
            $self->_should_populate($bytes_populated,time()-$start_time)) {
        my $pre_populate_available = $self->volume()->state()->available();

# Ideally, we would support asking the DataSet how much space it intends
# to consume. If the DataSet modules implemented this correctly, we
# could avoid running over the maximum. The current implementation
# implements a "soft" maximum and may overrun if the DataSet is large. 
        my $dataset_pkg = $self->_next_dataset_pkg();
        my $dataset = $dataset_pkg->create(
                volume => $self->volume(),
                clients => $self->clients(),
                fast_mode => $self->fast_mode(),
                );
        $self->_track_dataset_usage($dataset_pkg);
        my $post_populate_available = $self->volume()->state()->available();

# Alternatively, we could ask the DataSet how much it populated. This
# would be required if we are going to do parallel population with
# multiple threads/NATE::Process instances. The DataSet might do this by
# creating a Qtree and using a tracking Quota on that Qtree.
        $Log->trace("$dataset_pkg: pre available: $pre_populate_available ".
                "post available: $post_populate_available delta: ".
                ($post_populate_available-$pre_populate_available)
                );
        $bytes_populated += ($post_populate_available - $pre_populate_available);
    }
    $self->_print_dataset_usage();
}

# These are private methods

# Track the number of times each DataSet was populated to the volume. We may
# eventually want to support a mode where each DataSet is populated once or we
# pick certain DataSets with bias. Printing out the number of times each DataSet
# was populated is helpful for the user to understand what is on the volume,
# something required for filing many burts.
sub _track_dataset_usage {
    my ($self, $dataset) = @_;

    $self->dataset_usage_tracker()->{$dataset}->{times_used}++;

    return $self;
}

sub _print_dataset_usage {
    my ($self) = @_;

    $Log->comment("DataSet usage count for volume ".$self->volume()->volume());
    foreach my $dataset_key (keys %{$self->{dataset_usage_tracker}}) {
        $Log->comment("$dataset_key was used ".
            $self->dataset_usage_tracker()->{$dataset_key}->{times_used}.
            " times "
        );
    }
}

# We need to make sure the package string has been 'used'
sub _use_pkgs {
    my ($self) = @_;

    foreach my $pkg (@{$self->dataset_pkg_list}) {
        eval "use $pkg;";

        if ($@) {
            NATE::BaseException->throw("while using $pkg hit $@");
        }
    }
    return $self;
}

# Return the name of the next dataset package by doing a round-robin over the
# list
sub _next_dataset_pkg {
    my ($self) = @_;

    my $dataset_pkg = shift @{$self->dataset_pkg_list()};
    push @{$self->dataset_pkg_list()}, $dataset_pkg;

    return $dataset_pkg;
}

sub _can_populate {
    my ($self, $bytes_populated, $time_used) = @_;

    if ($bytes_populated > $self->maximum_population_bytes() || 
        $time_used > $self->maximum_population_duration()) {
        return 0;
    }
    return 1;
}

sub _should_populate {
    my ($self, $bytes_populated, $time_used) = @_;

    if ($bytes_populated < $self->desired_population_bytes() || 
        $time_used < $self->desired_population_duration()) {
        return 1;
    }
    return 0;
}


1;