# # Copyright (c) 2014 NetApp, Inc., All Rights Reserved # Any use, modification, or distribution is prohibited # without prior written consent from NetApp, Inc. # package NACL::MTask::ScaledPopulation; use Moose; use Tharn; use NATE::Log qw(log_global); my $Log = log_global(); my $may_enter = $Log->may_enter(); my $may_exit = $Log->may_exit(); use NATE::BaseException qw(:try); use List::Util qw(min max); =head1 NAME NACL::MTask::ScaledPopulation =head1 SYNOPSIS use NACL::MTask::ScaledPopulation; use NACL::MTask::DataSet; my @Basic_Datasets = ( 'NACL::MTask::DataSet::QtreeBasic', 'NACL::MTask::DataSet::VBNBADBasic', 'NACL::MTask::DataSet::FileReservationBasic', 'NACL::MTask::DataSet::HardlinkBasic', ); my $scaled_populate = NACL::MTask::ScaledPopulation->new( volume => $volume, clients => [$topology->clients()], dataset_pkg_list => [ @Basic_Datasets ], fast_mode => 1, ); $scaled_populate->populate(); # Find the DataSets that were populated on the volume my @datasets = NACL::MTask::DataSet->find( volume => $volume, clients => [$topology->clients()], ); # Validate the DataSets returned for up to 10 minutes my $end_time = time() + 60*10; while (time() < $end_time) { my $data_set = shift @datasets; push @datasets, $data_set; $data_set->validate(); } =head1 DESCRIPTION Please read L to understand the general behavior and capabilities of DataSet libraries. ScaledPopulation helps the caller to write testcases that appropriately scale to the available hardware. Testcases specify a desired duration and a desired size. The desired size will usually be computed by taking a fraction of the available space on the volume. The library will populate until both the desired duration and desired size have been met. To prevent the testcase from crashing or taking too long, some maximum durations and maximum population sizes will be automatically computed or can be specified by the caller. The max duration will prevent population from taking an excessive amount of time if the filer is slow because of a debug build, vsim limitations, or running in suspend/restart mode. The maximum size will prevent the population from taking too much space on the volume and ensures that a reasonable amount of space remains for the caller. Any DataSets created on the volume can be found using NACL::MTask::DataSet->find(). =head1 LIMITATIONS This library currently can not be safely called from multiple tests running at the same time on the same volume. The maximum is also not currently a hard restriction on the amount of space taken, so configuring it near available size may cause the library to exceed the volume's capacity and fail. =head1 ATTRIBUTES =head2 maximum_population_duration The duration in seconds that we should avoid exceeding for population time. The library will attempt to avoid exceeding this, but long running DataSets may end up causing this to be exceeded. =head2 maximum_population_bytes The maximum number of bytes that we should populate to the volume. The library will attempt to avoid exceeding this, but DataSets don't currently tell us how much space they will use before they populate. If an upper bound is eventually provided by the DataSet we can eventually guarantee the maximum will not be exceeded. =head2 desired_population_bytes The number of bytes that we should attempt to populate on the volume. Population will keep going until this value is hit, but stop after either maximum is hit. The library may not reach the number of desired bytes if the system is very slow and the maximum_population_duration is hit. =head2 desired_population_duration The number of seconds that the library should attempt to populate for. This may not be reached if the system is very fast and maximum_population_bytes is hit before the desired duration is met. =head2 volume A L instance of the volume to populate on. =head2 clients An array reference of L instances to use for populating the NACL::MTask::DataSet instances. At least one Windows and Linux client are required for most DataSets. =head2 dataset_pkg_list An array reference of NACL::MTask::DataSet package names to use for the population. =cut has 'maximum_population_duration' => ( is => 'rw', isa => 'Int', predicate => 'has_maximum_population_duration', ); has 'maximum_population_bytes' => ( is => 'rw', isa => 'Int', predicate => 'has_maximum_population_bytes', ); has 'desired_population_bytes' => ( is => 'rw', isa => 'Int', default => 1024*1024*10, ); has 'desired_population_duration' => ( is => 'rw', isa => 'Int', default => 30, ); has 'volume' => ( is => 'ro', isa => 'Object', required => 1, ); has 'clients' => ( is => 'ro', isa => 'ArrayRef', required => 1, ); # DataSets implement a minimal interface, but they can have optional # enhanced interfaces. We don't want to prevent clients of this library from # using that enhanced interface. This means we need to have an easy mode where # we build the call for this library's client and an advanced mode where the # client builds the call and passes a subref for us to call. has 'dataset_pkg_list' => ( is => 'rw', isa => 'ArrayRef[Str]', required => 1, ); # These are private attributes has 'dataset_usage_tracker' => ( is => 'rw', isa => 'HashRef', default => sub { return {}; }, ); has 'fast_mode' => ( is => 'rw', isa => 'Int', default => 0, ); =head1 METHODS Please read L to understand the general behavior and capabilities of DataSet libraries. =head2 new (Static method) Instantiate a ScaledPopulation library that will create DataSets on a volume based on a configuration. =over Options =over =item C<< clients => [ NACL::C::Client instances ] >> (Required) A list of clients to use for population, it should have at least one Linux and one Windows client. =item C<< volume => NACL::C::Volume instance >> (Required) The NACL::C::Volume instance of the volume to populate on. =item C<< desired_population_duration >> (Optional) The amount of time in seconds to populate until. See the ATTRIBUTES section for more detail. =item C<< desired_population_bytes >> (Optional) Keep populating until this number of bytes has been created. See the ATTRIBUTES section for more detail. =item C<< maximum_population_duration >> (Optional) The maximum number of seconds to populate for. One will be computed based on desired_population_duration if not specified. See the ATTRIBUTES section for more detail. =item C<< maximum_population_bytes >> (Optional) The maximum number of bytes to populate. One will be computed based on desired_population_bytes if not specified. See the ATTRIBUTES section for more detail. =back =over Returns an instance of this object. =back =back =cut sub BUILD { my ($self) = @_; $self->_use_pkgs(); if ($self->fast_mode()) { $self->desired_population_duration(15); $self->maximum_population_duration(30); $self->desired_population_bytes(1024*1024*10); $self->maximum_population_bytes(1024*1024*20); } if (!$self->has_maximum_population_bytes()) { my $vol_state = $self->volume()->state(); my $extra_bytes = 0.25*$self->desired_population_bytes(); # 25% will be very small if the volume is tiny, make sure it is at least # 10MB. $extra_bytes = max($extra_bytes,1024*1024*10); # If the proposed desired size is 10TB, 25% will be a large amount. Make # sure we don't go too crazy with the maximum amount. Limit the # extra_bytes to 10GB. $extra_bytes = min(1024*1024*1024*10,$extra_bytes); my $proposed_max_bytes = $self->desired_population_bytes()+$extra_bytes; # Ensure that maximum_population_bytes doesn't exceed the available # bytes. We will go up to 90% of the available space until we can set # tighter bounds on this. $proposed_max_bytes = int(min( $proposed_max_bytes, $vol_state->available()*.9 )); $Log->comment("Setting maximum_population_bytes for volume ". $self->volume()->volume()." to $proposed_max_bytes ($extra_bytes ". "above the desired byte count)" ); $self->maximum_population_bytes( $proposed_max_bytes ); } if (!$self->has_maximum_population_duration()) { my $extra_duration; $extra_duration = 0.25*$self->desired_population_duration(); # 25% will be very small if the duration is 15 seconds. Set the maximum # duration to at least 60 seconds beyond the desired duration. $extra_duration = max(60, $extra_duration); # Limit the extra duration to at most 30 minutes. $extra_duration = min(60*30, $extra_duration); $Log->comment("Setting maximum_population_duration for volume ". $self->volume()->volume()." to ". $self->desired_population_duration()+$extra_duration. " ($extra_duration above the desired population duration)" ); $self->maximum_population_duration( $self->desired_population_duration()+$extra_duration ); } foreach my $dataset (@{$self->dataset_pkg_list()}) { $self->dataset_usage_tracker()->{$dataset} = { times_used => 0, } } } =head2 populate $scaled_population->populate(); (Instance method) Do the population as specified in the constructor. Block until the population is complete. =cut sub populate { my ($self) = @_; # While there is time remaining # While there is space remaining # Use "next" dataset # If this dataset can be done in the time available # If this dataset can be done in the space available my $bytes_populated = 0; my $start_time = time(); while ($self->_can_populate($bytes_populated,time()-$start_time) && $self->_should_populate($bytes_populated,time()-$start_time)) { my $pre_populate_available = $self->volume()->state()->available(); # Ideally, we would support asking the DataSet how much space it intends # to consume. If the DataSet modules implemented this correctly, we # could avoid running over the maximum. The current implementation # implements a "soft" maximum and may overrun if the DataSet is large. my $dataset_pkg = $self->_next_dataset_pkg(); my $dataset = $dataset_pkg->create( volume => $self->volume(), clients => $self->clients(), fast_mode => $self->fast_mode(), ); $self->_track_dataset_usage($dataset_pkg); my $post_populate_available = $self->volume()->state()->available(); # Alternatively, we could ask the DataSet how much it populated. This # would be required if we are going to do parallel population with # multiple threads/NATE::Process instances. The DataSet might do this by # creating a Qtree and using a tracking Quota on that Qtree. $Log->trace("$dataset_pkg: pre available: $pre_populate_available ". "post available: $post_populate_available delta: ". ($post_populate_available-$pre_populate_available) ); $bytes_populated += ($post_populate_available - $pre_populate_available); } $self->_print_dataset_usage(); } # These are private methods # Track the number of times each DataSet was populated to the volume. We may # eventually want to support a mode where each DataSet is populated once or we # pick certain DataSets with bias. Printing out the number of times each DataSet # was populated is helpful for the user to understand what is on the volume, # something required for filing many burts. sub _track_dataset_usage { my ($self, $dataset) = @_; $self->dataset_usage_tracker()->{$dataset}->{times_used}++; return $self; } sub _print_dataset_usage { my ($self) = @_; $Log->comment("DataSet usage count for volume ".$self->volume()->volume()); foreach my $dataset_key (keys %{$self->{dataset_usage_tracker}}) { $Log->comment("$dataset_key was used ". $self->dataset_usage_tracker()->{$dataset_key}->{times_used}. " times " ); } } # We need to make sure the package string has been 'used' sub _use_pkgs { my ($self) = @_; foreach my $pkg (@{$self->dataset_pkg_list}) { eval "use $pkg;"; if ($@) { NATE::BaseException->throw("while using $pkg hit $@"); } } return $self; } # Return the name of the next dataset package by doing a round-robin over the # list sub _next_dataset_pkg { my ($self) = @_; my $dataset_pkg = shift @{$self->dataset_pkg_list()}; push @{$self->dataset_pkg_list()}, $dataset_pkg; return $dataset_pkg; } sub _can_populate { my ($self, $bytes_populated, $time_used) = @_; if ($bytes_populated > $self->maximum_population_bytes() || $time_used > $self->maximum_population_duration()) { return 0; } return 1; } sub _should_populate { my ($self, $bytes_populated, $time_used) = @_; if ($bytes_populated < $self->desired_population_bytes() || $time_used < $self->desired_population_duration()) { return 1; } return 0; } 1;