# # Copyright (c) 2012 NetApp, Inc., All Rights Reserved # Any use, modification, or distribution is prohibited # without prior written consent from NetApp, Inc. # ## @summary MTask module to create/manage data tailored for compression testing ## @author rjray@netapp.com ## @status shared ## @pod here package NACL::MTask::CompressibleData; # A combination of $Log->enter() use and NACL-style parameter validation makes # this directive necessary for PCAST/Perl::Critic: ## no critic(RequireArgUnpacking) # We're working in straight ASCII, so we don't need this one either: ## no critic(ProhibitEnumeratedClasses) use strict; use warnings; use base qw(NACL::MTask::MTask); use Fcntl qw(:DEFAULT :seek); use Params::Validate qw(validate validation_options SCALAR ARRAYREF BOOLEAN OBJECT); use Readonly; use NATE::Log qw(log_global); my $Log = log_global(); my $may_enter = $Log->may_enter(); my $may_exit = $Log->may_exit(); use NACL::ComponentUtils qw(_verify_invocation); use NACL::C::Client; use NACL::STask::Volume; use NACL::STask::Client::Mount; use NATE::BaseException qw(:try); use NATE::Exceptions::Argument; # Set the on_fail behavior for Params::Validate to throw an exception (of the # NATE::Exceptions::Argument class) on errors instead of a plain croak(). validation_options( on_fail => sub { NATE::Exceptions::Argument->throw(shift); } ); # Note that all of these elements are all named as private attributes, thus # ensuring that the methods C::MM generates are also named as private. We # will define our own public accessors that are read-only for users to call. # Also, I will be providing an explicit new() constructor, so that users can # use the "visible" accessor names as options, while the data is stored under # the private names. use Class::MethodMaker [ scalar => [ { -type => 'NACL::C::Client' }, '_client', ], scalar => [ { -type => 'NACL::STask::Volume' }, '_volume', ], scalar => [ { -type => 'NACL::STask::Client::Mount', -forward => [ 'mount_point' ], }, '_mount', ], hash => '_files', scalar => '_preserve_mount', scalar => '_preserve_files', scalar => '_local_client_obj', scalar => '_local_mount_obj', ]; # These are the publically-visible accessors, all read-only: for my $method (qw(client volume mount)) { ## no critic(ProhibitNoStrict) no strict 'refs'; my $private = "_$method"; *{$method} = sub { $Log->enter() if $may_enter; my $self = shift; $Log->exit() if $may_exit; return $self->$private(); }; } # This is used as an internal table to make sure that there are not two # objects pointing to the same host/volume combination at the same time: my %OBJECT_ID_MAP = (); # This is used by _write_to_file() as a buffer of all zero-bytes: Readonly my $ZEROBUF => chr(0) x 4096; =head1 NAME NACL::MTask::CompressibleData - Create data for predictable compression =head1 SYNOPSIS use NACL::STask::Volume; use NACL::MTask::CompressibleData; # Create a volume to write to: my $volume = NACL::STask::Volume->create( command_interface => $node, vserver => $vserver, aggregate => $aggr, type => 'RW', 'unix-permissions' => '---rwxrwxrwx', nacltask_wait => 1 ); # Mount the volume on the server side: $volume->mount('junction-path' => '/' . $volume->volume); # Now create the CompressibleData handle-- this will create # the mount between the filer and localhost for us: my $compressible = NACL::MTask::CompressibleData->new( volume => $volume ); # Using the handle, create some files that will compress in # predictable ways: # Create a file that compresses from 8 blocks to 3: $compressible->file_create( file => 'from8to3', pattern => 'H V1 V2 V3 T T T T' ); # Create a file that compresses from 6 to 3, with zero'd # blocks in positions 7 and 8: $compressible->file_create( file => 'from6to3with0s', pattern => 'H V1 V2 V3 T T 0 0' ); =head1 DESCRIPTION The B class is a utility class that enables the creation of files with semi-random data that will compress in predictable ways. It takes a B object and a B object (or a B) and creates a mount-point on the client that mounts the volume. Then arbitrary files can be created according to patterns that the user provides. Creating and manipulating files is the main focus of this class. When the user makes a call to create a file, they provide a file name and a pattern that represents what the file should look like I compression is applied. The user does not need to worry about what the file will look like before compression, the module handles that for them. An object of this class will keep track of the files that it has created. Files that it has created, it will also delete if and when requested. Files that it has not created, it will not delete. This class is not meant to act as a file manager and thus does not have facilities for creating/removing directories, etc. If needed, directories can be created using classes such as B. Upon destruction of the object, the mount-point and/or files may be purged, depending on the user's choices when creating the object. For now, this class assumes that it is operating in a Unix/Linux environment, with the client that it uses being the same host that the test itself is running on (that is, the client object is connected to C). When file operations are available as remote operations through NACL client objects, this class may be extended to work remotely. Only one object may exist for a given client/volume pair at a given time. This is to prevent one object from purging the mount-point while a second object expects it to still exist. =head1 ATTRIBUTES The following attributes are publically-available on each instance of this class. Note that these are read-only; once an object of this class is created, none of these values can be changed. Also note that C, C and C return NACL objects, which means you can break things if you are not careful. For example, if you purge the volume while an object of this class is still active, things will break. =head2 client Returns the B object used to create the CompressibleData object. If none was passed during creation, then this returns the B object that was created internally. =head2 volume Returns the B object used to create the CompressibleData object. If the CompressibleData object was created with a B object, it will have been promoted to an STask object internally, and that STask is what this attribute will return. =head2 mount Returns the B object being used internally by the object. If the CompressibleData object was created with a user-provided B object, it will have been promoted to an STask object internally, and that STask is what this attribute will return. =head2 mount_point This is a short-cut to calling C<< $self-Emount-Emount_point >>. It returns the physical mount-point on the client host, where the volume is mounted. =head1 METHODS In addition to the attributes above, the following methods are made available by this class. =head2 new # Basic construction: $compressible = NACL::MTask::CompressibleData->new( volume => $volume_object ); # Create an object that preserves the mount-point # upon destruction: $compressible = NACL::MTask::CompressibleData->new( volume => $volume_object, preserve_mount => 1 ); # Create an object with explicit, pre-created client # and mount-point: $compressible = NACL::MTask::CompressibleData->new( client => $client_object, volume => $volume_object, mount => $mount_object ); (May only be called as a class method.) B creates a new object of this class and returns a reference to it. The only required argument is the C parameter, as the client defaults to C and the mount-point can be created if it is not provided. =over =item Options =over =item C<< client =E $client_object >> (Optional) An object of the B class. If not passed, then an object will be created with C as the client-host. =item C<< volume =E $volume_object >> (Required) An object of the B class (alternately, an object of the B class, which will be promoted to STask). This is the volume onto which all files will be written. =item C<< mount =E $mount_object >> (Optional) An object of the B class (or an object of the B class, which will be promoted to STask). This object represents the mounting of the volume onto the client. If not passed, one will be created internally. The user may pass their own mount object if, for example, they want the mount to persist even after this object is destroyed. =item C<< preserve_mount =E 0|1 >> (Optional, default is 0) If the mount object is created internally, then it is purged when the object is destroyed. This will also unmount the volume from the client. If you want the mount to stay in place after the destruction of the CompressibleData object, pass this option with a non-false value. If the user passes in their own mount object with the C option, then this flag is ignored. A user-provided mount-point will never be purged. =item C<< preserve_files =E 0|1 >> (Optional, default is 1) If C is false, then deleting the CompressibleData object also purges all the files it created from the volume. The default is 1 (C), to keep all the files intact. The user may set this to a false value if they want the files to be deleted when this object is. =back =item Exceptions If B creates the mount-point for the user, it is possible that the call to create the mount-point might throw an exception. If you attempt to create more than one CompressibleData object that links the same client and volume, the second one will fail. An exception of the class B will be thrown. =back =cut ############################################################################### # # Sub Name: new # # Description: Class constructor. # # Arguments: NAME IN/OUT TYPE DESCRIPTION # $class in scalar Class to bless into # @opts in list Additional options, converted # to %opts by validate() # # Returns: Success: new object of this class # Failure: throws exception # ############################################################################### sub new { $Log->enter() if $may_enter; my ($class, @opts) = @_; # This must me a static method invocation: $class->_verify_invocation(style => 'static_only'); my %opts = validate( @opts, { client => { type => OBJECT, isa => 'NACL::C::Client', optional => 1, }, volume => { type => OBJECT, isa => 'NACL::C::Volume', }, mount => { type => OBJECT, isa => 'NACL::C::Client::Mount', optional => 1, }, preserve_mount => { # This is treated as a boolean, but I want the default to be # 0 rather than undef. Just in case I'd trigger any "use of # undefined value" warnings. type => BOOLEAN, default => 0, }, preserve_files => { type => BOOLEAN, default => 1, }, } ); # A flag, whether we created the client object or not my $local_client_obj = 0; # Since we only operate on localhost currently, "client" is optional and # can be created here if it isn't provided: if (! $opts{client}) { # If $opts{mount} *was* provided, use the value from it as the # client, otherwise create one and note it as a local object for # when we clean up. if ($opts{mount}) { $opts{client} = $opts{mount}->command_interface; } else { $opts{client} = NACL::C::Client->new(name => 'localhost'); $local_client_obj = 1; } } # The volume and mount (if present) options need to be STask objects, but # are allowed to come in as components. Upgrade them to STasks if needed. if (! $opts{volume}->isa('NACL::STask::Volume')) { $opts{volume} = $opts{volume}->cast_component_to_task(); } if ($opts{mount} && ! $opts{mount}->isa('NACL::STask::Client::Mount')) { $opts{mount} = $opts{mount}->cast_component_to_task(); } # Create the basis for the object. We'll use the C::MM-created accessors # to set the values. my $self = bless {}, $class; # Set the client, volume and preserve values: $self->_client($opts{client}); $self->_volume($opts{volume}); $self->_preserve_mount($opts{preserve_mount}); $self->_preserve_files($opts{preserve_files}); $self->_local_client_obj($local_client_obj); # Check to see if this client/volume pair already exists in the map table: my $key = $self->_map_key(); if ($OBJECT_ID_MAP{$key}) { # Can't have two of the same mapping active at once. NATE::Exceptions::Argument->throw( "Error: a $class object already exists for client " . $opts{client}->name . ', volume ' . $opts{volume}->volume ); } # If mount is not passed in, we create it. If we create it, we note this # in the _local_mount_obj property. if ($opts{mount}) { $self->_mount($opts{mount}); $self->_local_mount_obj(0); } else { # This will throw an exception if it fails: my $mount = NACL::STask::Client::Mount->create( command_interface => $opts{client}, volume => $opts{volume} ); $self->_mount($mount); $self->_local_mount_obj(1); } # If we successfully created the full object, register it with the map and # return it: $OBJECT_ID_MAP{$key} = 1; $Log->exit() if $may_exit; return $self; } =head2 file_create # Create a file that compresses from 8 blocks to 3: $compressible->file_create( file => 'from8to3', pattern => 'H V1 V2 V3 T T T T' ); # Create a file that compresses from 6 to 3, with zero'd # blocks in positions 7 and 8. Note that spaces in the # pattern are optional: $compressible->file_create( file => 'from6to3with0s', pattern => 'HV1V2V3TT00' ); # Create several files at once. The size of @files and # @patterns must be equal or an exception is thrown: $compressible->file_create( file => \@files, pattern => \@patterns ); (Instance method only) This method is used to create files on the volume that is encapsulated by this object. A file is created according to a pattern provided by the caller. Multiple files can be created in a single call by passing list references for the C and C options. A file being created should not already exist; if it does, an exception is thrown. To change an existing file, see B, below. The method returns void on success and throws an exception on any error. The pattern controls the data that is written to a file, as well as dictating the size of the file itself. The patterns are meant to convey what the file will look like I compression is applied to the volume or volume snapshot that the file is on. When the file is initially written, it will (usually) be denser than the pattern would imply, as this is the pre-compression data. Patterns are a sequence of characters, optionally separated by spaces. The syntax of the patterns is based on the wiki page for the Compression Group Sharing Test Plan (L). The key characters are: =over =item B This character represents the I
block of a compression group. It should be the first character in a compression-group-sized series (a compression group is 8 blocks in size), and must only appear once per compression group. For example, the following two patterns are both good: H V1 V2 T T T T T H V1 V2 V3 T T T T H V1 T T The first is a full compression group-length pattern. The second spans two groups, and the second B occurs as the first character in the second group. This pattern is bad: H V1 V2 T T T H V1 V2 T T T T T In this pattern, the second B falls within the boundaries of the first compression group. (The first group is only 6 blocks in size, and would need to be padded with holes, see B<0> below.) =item B This character represents a I block in a compression group. It may appear multiple times in a pattern, but all instances must be grouped together at the end of the compression group. It is used to help determine the overall size of the pre-compression file. =item B> This character (or multi-character sequence) represents a non-compressible block of data (usually a random stream of bytes). The additional characters are optional, and are generally used when specifying the pattern to help count the number of non-compressible blocks being included in the pattern. In the wiki page, both numerals and lower-case letters are used for the additional characters. To avoid confusion with other pattern characters, only numerals and I letters are allowed for the additional characters. Also, if a B is followed by numerals, the numerals cannot have a leading C<0>, or this will be interpreted as a "hole" block in the file (see the next item). Any of these specify a single data block: V Va V1 Vabc V16 These are either errors or would be mis-interpreted: VA V+ V0 (The last one is interpreted as two blocks, a data block and a hole.) =item B<0> A B<0> (zero) represents a zero-block, or "hole", in the file. These may appear at the end of a compression group to signify that the group does not occupy all 8 blocks, or they may be interspersed between B-blocks when creating a pattern for a file that does not compress. A B<0> cannot appear between a B character and the last B of the sequence that the B starts. The following pattern is bad: H V1 0 V2 T T T T Even though it specifies exactly 8 blocks, the hole between the two B-blocks is not allowed when defining a compressed data sequence. =back See the wiki page referenced above for a long list of example patterns currently being used. =over =item Options This method recognizes the following options: =over =item C<< file =E $file_name >> The name of the file to create. This should be a relative path. Absolute paths and paths containing C<..> will be rejected. All files are created relative to the mount-point this object has for the client/volume pair. May be a list-reference to specify multiple files. =item C<< pattern =E $sequence >> The pattern to use in creating the file's content. Must conform to the guidelines laid out above. If C is a list-reference then this may also be a list-reference with the same number of elements, to specify patterns for each file. Alternately, if this is just a single value then the one pattern is used for all files. =item C<< offset =E $integer >> (Defaults to 0) The offset within the file (in 4k blocks) to start writing the data given in C. Generally, when creating a file, you don't want to use this. However, there may be cases where you do want to skip ahead before writing the initial data so this option is provided. If there are multiple files given by the C option, it will apply to all of them. Note that the value is in blocks, not bytes. The default is 0. =item C<< delete_on_error =E 0|1 >> (Defaults to 1) If true (1), then an error triggers the deletion of all files specified by the C option. This is to prevent a situation where a request may leave a partially-completed file (or set of files) in place after an error. The default is true (1), set to false (0) to disable this. =back Both C and C may be list-references instead of scalars, to allow for creating multiple files in a single call. If this is used, the list references must have identical numbers of elements in them B C must be exactly one pattern (which is then used for all files). =item Exceptions The following exceptions may be thrown by this method: =over =item B If a bad option is passed, or if the value of C or C is invalid, this exception is thrown. This is also thrown if there is a mis-match between the number of files specified and the number of patterns. It is also thrown if there is a list of file names and the list contains a duplicate name. =item B If the creation of the file fails, this exception is thrown. The file-name will be in the text of the exception, for cases where multiple files are being created. This exception could signal an error in the opening, writing or closing of the file. =back =back =cut ############################################################################### # # Sub Name: file_create # # Description: Create one or more files according to passed-in pattern(s) # # Arguments: NAME IN/OUT TYPE DESCRIPTION # $self in ref Object of this class # @opts in list Additional options, converted # to %opts by validate() # # Returns: Success: void # Failure: throws exception # ############################################################################### sub file_create { $Log->enter() if $may_enter; my ($self, @opts) = @_; $self->_verify_invocation(style => 'instance_only'); my %opts = validate( @opts, { file => { type => SCALAR | ARRAYREF, }, pattern => { type => SCALAR | ARRAYREF, }, offset => { type => SCALAR, default => 0, regex => qr/^\d+$/, }, delete_on_error => { type => BOOLEAN, default => 1, }, } ); # Make $files an array-ref regardless of input: my $files = ref($opts{file}) ? $opts{file} : [ $opts{file} ]; # Make $patterns an array-ref as well: my $patterns = ref($opts{pattern}) ? $opts{pattern} : [ $opts{pattern} ]; # Is there an offset to move to, before writing? my $offset = $opts{offset}; # Do we completely clean-up in case of an error? my $delete_on_error = $opts{delete_on_error}; # This will hold the parsed representations of the patterns: my @parsed_patterns; # Make sure neither @{$files} nor @{$patterns} is empty if (! (@{$files} && @{$patterns})) { NATE::Exceptions::Argument->throw( q(One or both of 'file' or 'pattern' is an empty list) ); } # If we have one pattern and multiple files, copy the pattern enough # times to use it for all files if ((@{$files} > 1) && (@{$patterns} == 1)) { @{$patterns} = ($patterns->[0]) x scalar(@{$files}); } # Check that we have equal numbers of files and patterns if (@{$files} != @{$patterns}) { NATE::Exceptions::Argument->throw( q(Count mis-match between 'file' and 'pattern' arguments) ); } # If we have more than one file, check for duplicates if (@{$files} > 1) { my %seen; for my $file (@{$files}) { if ($seen{$file}++) { NATE::Exceptions::Argument->throw( "Filename '$file' is duplicated in list" ); } } } # Check that all the files are valid and not already "known": for my $file (@{$files}) { if ($file =~ m{^/}) { NATE::Exceptions::Argument->throw( "Absolute filename '$file' not allowed" ); } # This pattern disallows any ".." in $file while still allowing # single dots or 3+ dots: if ($file =~ m{^[.][.]/|/[.][.]/|^[.][.]$|/[.][.]$}) { NATE::Exceptions::Argument->throw( "Filename '$file' contains '..', which is not allowed" ); } if ($self->_files_exists($file)) { NATE::Exceptions::Argument->throw( "Filename '$file' already created by this object" ); } } # Pre-parse all the patterns, so that we know of an error before we've # written any files: for my $idx (0 .. $#{$patterns}) { my $parsed = $self->_parse_pattern($patterns->[$idx]); if (ref $parsed) { push(@parsed_patterns, $parsed); } else { NATE::Exceptions::Argument->throw( "Invalid pattern '$patterns->[$idx]' (for file " . "$files->[$idx]): $parsed" ); } } # Process the files, using the parsed versions of the patterns. Keep # track of files that are created so that if an error occurs we can # completely roll back to the state we were in before we started. my @files_done; # To track the completed files in case of error for my $idx (0 .. $#{$files}) { try { $self->_write_to_file( $files->[$idx], $parsed_patterns[$idx], $offset ); push(@files_done, $files->[$idx]); # Use the file size (in blocks) as the value on the internal # hash table. We may need it in file_metadata(). $self->_files_set( $files->[$idx] => $parsed_patterns[$idx]->{total_size} + $offset ); } catch NATE::BaseException with { my $exception = shift; # If they opted for a complete clean-up on error, clear out the # files we've created thus far. Unlink them and remove them from # the internal table. if ($delete_on_error) { my $mntpt = $self->mount_point; for my $file (@files_done) { # Remove the file unlink("$mntpt/$file"); # Remove from the internal table $self->_files_delete($file); } # Just to be sure... unlink("$mntpt/$files->[$idx]"); } # Re-throw $exception so that we exit $exception->throw; }; } $Log->exit() if $may_exit; return; } =head2 file_modify # Modify an existing file: $compressible->file_modify( file => $existing_file, pattern => $new_data_pattern ); # Modify a file at an offset. This modifies the # file 'from8to3' in the previous example by moving # forward 8 blocks and adding another full compression # group of data: $compressible->file_modify( file => 'from8to3', pattern => 'HVVVTTTT', offset => 8 ); (Instance method only) This method modifies an existing file, allowing the user to change the data within it or add new data to the end. The method returns void on success and throws an exception on error. Using this to change a file is preferred over deleting it and re-creating it, as this preserves the inode already assigned to the file. A combination of B and B could result in different inode/block allocations. B uses the same pattern definition that B uses. Note that if the pattern specified to B is shorter in terms of blocks than the original pattern used to create the file, the file will be truncated down to the new length: # Create the file 'test_file' as 8 blocks in size: $compressible->file_create( file => 'test_file', pattern => 'H V V V T T T T' ); # Modify it with a 6-block pattern: $compressible->file_modify( file => 'test_file', pattern => 'H V V T T T' ); # The file is now 6 blocks in size This method cannot be used to create a file, an exception will be thrown if the given file-name is not one already created by B. =over =item Options This method recognizes the following options: =over =item C<< file =E $file_name >> The name of the file to modify. This must be a path that was already created by a previous call to B or an exception is thrown. May be a list-reference to specify multiple files. =item C<< pattern =E $sequence >> The pattern to use in modifying the file's content. Must conform to the guidelines laid out in the documentation for B. If C is a list-reference then this may also be a list-reference with the same number of elements, to specify patterns for each file. Alternately, if this is just a single value then the one pattern is used for all files. =item C<< offset =E $integer >> (Defaults to 0) The offset within the file (in 4k blocks) to start writing the data given in C. If there are multiple files given by the C option, it will apply to all of them. Note that the offset is in blocks, not bytes. =back Both C and C may be list-references instead of scalars, to allow for modifying multiple files in a single call. If this is used, the list references must have identical numbers of elements in them B C must be exactly one pattern (which is then used for all files). =item Exceptions The following exceptions may be thrown by this method: =over =item B If a bad option is passed, or if the value of C or C is invalid, this exception is thrown. This is also thrown if there is a mis-match between the number of files specified and the number of patterns. It is also thrown if there is a list of file names and the list contains a duplicate name. =item B If the modification of the file fails, this exception is thrown. The file-name will be in the text of the exception, for cases where multiple files are being modified. This exception could signal an error in the opening, writing or closing of the file. =back =back =cut ############################################################################### # # Sub Name: file_modify # # Description: Do an in-place modify of a file previously created by this # object. # # Arguments: NAME IN/OUT TYPE DESCRIPTION # $self in ref Object of this class # @opts in list Additional options, converted # to %opts by validate() # # Returns: Success: void # Failure: throws exception # ############################################################################### sub file_modify { $Log->enter() if $may_enter; my ($self, @opts) = @_; $self->_verify_invocation(style => 'instance_only'); my %opts = validate( @opts, { file => { type => SCALAR | ARRAYREF }, pattern => { type => SCALAR | ARRAYREF }, offset => { type => SCALAR, default => 0, regex => qr/^\d+$/, }, } ); # Make $files an array-ref regardless of input: my $files = ref($opts{file}) ? $opts{file} : [ $opts{file} ]; # Make $patterns an array-ref as well: my $patterns = ref($opts{pattern}) ? $opts{pattern} : [ $opts{pattern} ]; # Is there an offset to move to, before writing? my $offset = $opts{offset}; # This will hold the parsed representations of the patterns: my @parsed_patterns; # Make sure neither @{$files} nor @{$patterns} is empty if (! (@{$files} && @{$patterns})) { NATE::Exceptions::Argument->throw( q(One or both of 'file' or 'pattern' is an empty list) ); } # If we have one pattern and multiple files, copy the pattern enough # times to use it for all files if ((@{$files} > 1) && (@{$patterns} == 1)) { @{$patterns} = ($patterns->[0]) x scalar(@{$files}); } # Check that we have equal numbers of files and patterns if (@{$files} != @{$patterns}) { NATE::Exceptions::Argument->throw( q(Count mis-match between 'file' and 'pattern' arguments) ); } # If we have more than one file, check for duplicates if (@{$files} > 1) { my %seen; for my $file (@{$files}) { if ($seen{$file}++) { NATE::Exceptions::Argument->throw( "Filename '$file' is duplicated in list" ); } } } # Check that all the files are already "known": for my $file (@{$files}) { if (! $self->_files_exists($file)) { NATE::Exceptions::Argument->throw( "Filename '$file' was not created by this object" ); } } # Pre-parse all the patterns, so that we know of an error before we've # written any files: for my $idx (0 .. $#{$patterns}) { my $parsed = $self->_parse_pattern($patterns->[$idx]); if (ref $parsed) { push(@parsed_patterns, $parsed); } else { NATE::Exceptions::Argument->throw( "Invalid pattern '$patterns->[$idx]' (for file " . "$files->[$idx]): $parsed" ); } } # Process the files, using the parsed versions of the patterns. Note that # unlike in file_create() above, here we don't have to catch and re-throw # the exception, since there's no clean-up action to take. We just let any # exception bubble up to the caller. for my $idx (0 .. $#{$files}) { $self->_write_to_file($files->[$idx], $parsed_patterns[$idx], $offset); # In case the size changed, re-assign it here: $self->_files_set( $files->[$idx] => $parsed_patterns[$idx]->{total_size} + $offset ); } $Log->exit() if $may_exit; return; } =head2 file_delete $compressible->file_delete(file => $filename); (Instance method only) Delete a file that has been created by this object. This will only delete files that have been created by this object. If the file is not known to this object an exception will be thrown. If the deletion fails, an exception is thrown. If the deletion succeeds the method returns void. =over =item Options =over =item C<< file =E $filename >> The name of the file to delete. Must exactly match a file name already created by the object. May be a list-reference of multiple files to delete. =back Like B, the C option may take a list-reference of one or more file-names to delete. However, if just one file in the list is not known to the object, none will be deleted (an exception is thrown immediately). =item Exceptions The following exceptions may be thrown by this method: =over =item B If a bad option is passed, or if the value of C is invalid, this exception is thrown. This exception will also be thrown if C is a list-reference and there are duplicate names in the list. =item B If the deletion of the file fails, this exception is thrown. The file-name will be in the text of the exception, for cases where multiple files are being deleted. =back =back =cut ############################################################################### # # Sub Name: file_delete # # Description: Delete one or more files as specified by the caller. # # Arguments: NAME IN/OUT TYPE DESCRIPTION # $self in ref Object of this class # @opts in list Additional options, converted # to %opts by validate() # # Returns: Success: void # Failure: throws exception # ############################################################################### sub file_delete { $Log->enter() if $may_enter; my ($self, @opts) = @_; $self->_verify_invocation(style => 'instance_only'); my %opts = validate( @opts, { file => { type => SCALAR | ARRAYREF }, } ); # Make $files an array-ref regardless of input: my $files = ref($opts{file}) ? $opts{file} : [ $opts{file} ]; # Check that all the files are "known", and check for duplicates: my %seen; for my $file (@{$files}) { if (! $self->_files_exists($file)) { NATE::Exceptions::Argument->throw( "Filename '$file' was not created by this object, cannot " . 'delete' ); } if ($seen{$file}++) { NATE::Exceptions::Argument->throw( "Filename '$file' is duplicated in list" ); } } # Actually do the deleting: my $mntpt = $self->mount_point; for my $file (@{$files}) { if (! unlink("$mntpt/$file")) { NATE::BaseException->throw( "Error unlinking $file: $!" ); } else { $self->_files_delete($file); } } $Log->exit() if $may_exit; return; } =head2 file_metadata # Get the meta-data associated with the file $file that # was created by this object: $metadata = $compressible->file_metadata(file => $file); # Get metadata, but get 8 blocks even if the file is shorter # or longer: $metadata = $compressible->file_metadata( file => $file, size => 8 ); (Instance method only) This method uses C to get the meta-data about a specific file. It will only fetch data on files that were created by this object with B. The command run is: volume explore -format indir -find 'level=1,fbn>=0,fbn<8' The C<-scope> argument is also passed to the command, to limit it to the file being examined. The return value is a list-reference of one or more hash references. Each hash reference is one block of data from the C command. Each reference contains the following six keys: parent entry level fbn pvbn vvbn These are the columns of data returned by the command; see documentation for C for their meaning. =over =item Options =over =item C<< file =E $filename >> The name of the file to retrieve meta-data for. Must exactly match a file name already created by the object. =item C<< size =E $int >> (Optional) The number of blocks to retrieve meta-data for. The default is the size of the file according to the pattern it was originally created with, by B (or subsequently modified to, by B). =back =item Exceptions The following exceptions may be thrown by this method: =over =item B If the value of C is not known to this object, or if the value of C is not a valid integer, this exception will be thrown. =item B If the C command fails, or if it returns no information, this exception will be thrown. =back =back =cut ############################################################################### # # Sub Name: file_metadata # # Description: Use "volume explore" through the APISet to get the # metadata for the specified file. # # Arguments: NAME IN/OUT TYPE DESCRIPTION # $self in ref Object of this class # @opts in list Additional options, converted # to %opts by validate() # # Returns: Success: listref # Failure: throws exception # ############################################################################### sub file_metadata { $Log->enter() if $may_enter; my ($self, @opts) = @_; $self->_verify_invocation(style => 'instance_only'); my %opts = validate( @opts, { file => { type => SCALAR }, size => { type => SCALAR, optional => 1, regex => qr/^\d+$/, }, } ); if (! $self->_files_exists($opts{file})) { NATE::Exceptions::Argument->throw( "File '$opts{file}' not known to this object" ); } my $size = $opts{size} || $self->_files_index($opts{file}); # I am using the APISet here rather than NACL::C::Volume->explore # because the construction of the NACL::CS::VolumeExplore objects # drops 2 of the 6 fields that this command returns. my $apiset = $self->_volume->command_interface->apiset; my $reply = $apiset->volume_explore( scope => $self->volume->volume . "/$opts{file}", format => 'indir', find => qq('level=1,fbn>=0,fbn<$size'), dump => 'all' ); my @explore = @{$reply->get_parsed_output}; if (! @explore) { NATE::BaseException->throw( "'volume explore' over '$opts{file}' returned no data" ); } $Log->exit() if $may_exit; return \@explore; } =head2 file_verify $valid = $compressible->file_verify( file => $file, pattern => $pattern, compressed => 1 ); if (! $valid) { die "File $file content does not match pattern"; } (Instance method only) Verify that the content of the file, as viewed by the metadata for the file, matches the pattern specified. Returns true (1) if the content is valid, or false (0) if it is not. If an error occurs during processing an exception is thrown. =over =item Options =over =item C<< file =E $filename >> The file to be verified. Must be a file that was created by this object using B. =item C<< pattern =E $sequence >> The pattern used when this file was created or most-recently modified by B. Follows the same syntax as B and C. =item C<< offset =E $integer >> The offset, if any, used when the file was created or modified. This has the effect of prepending a corresponding number of C<0> markers to the value of C. =item C<< compressed =E $boolean >> If true, assume the file has had compression applied to it by the Storage Efficiency system. This affects how C is interpreted. =back Unlike the B and B methods, the values of C and C cannot be list-references in this case. Only one file may be verified per call to this method. =item Exceptions The following exceptions may be thrown in cases of error: =over =item B This may be thrown if any of the arguments to the method are invalid, or if the file specified by C is not known to this object (was not created by this object). =item B This may be thrown if the fetching of metadata fails to return any data. =back =back Note that unlike B and B, the C and C options do not allow multiple items to be passed. =cut ############################################################################### # # Sub Name: file_verify # # Description: Verify the metadata of the given file against what is # implied by the pattern and whether compression and/or # deduping has been applied. # # Arguments: NAME IN/OUT TYPE DESCRIPTION # $self in ref Object of this class # @opts in list Additional options, converted # to %opts by validate() # # Returns: Success: 1 (true) # Failure: 0 (false) or throws exception # ############################################################################### sub file_verify { $Log->enter() if $may_enter; my ($self, @opts) = @_; $self->_verify_invocation(style => 'instance_only'); my %opts = validate( @opts, { file => { type => SCALAR }, pattern => { type => SCALAR }, offset => { type => SCALAR, default => 0, regex => qr/^\d+$/, }, compressed => { type => BOOLEAN, default => 0, }, } ); my $verified = 1; my $compressed = $opts{compressed}; # As a side-effect, this will check that $opts{file} is known to this # object and throw the exception for us if it isn't. my $metadata = $self->file_metadata(file => $opts{file}); my @pattern; if ($compressed) { # If we are dealing with compressed data, then we rely on # $opts{pattern} more directly. Each character in the normalized # form directly corresponds to a block in $metadata. @pattern = $self->_normalize_pattern($opts{pattern}); } else { # If the data is not compressed, we'll need the parsed representation # of the pattern so that we know which blocks are data and which are # holes. Anything not a hole will have a non-zero pvbn. my $parsed = $self->_parse_pattern($opts{pattern}); if (! ref($parsed)) { # An error occurred parsing the pattern: NATE::Exceptions::Argument->throw( "Error parsing '$opts{pattern}': $parsed" ); } @pattern = @{$parsed->{blocks}}; } if ($opts{offset}) { # Prepend some "0" characters to @pattern to represent offset: unshift(@pattern, ('0') x $opts{offset}); } # Now verify. This is fairly simplistic, but we're not trying to stress- # test the compression engine here or anything, just trying to make sure # that the files create by file_create/file_modify ended up the way they # should have on disk. # First, make sure that we have the same number of elements in @pattern # as in @{$metadata}. If we don't, we have bad input and throw an # exception. if (@pattern != @{$metadata}) { NATE::Exceptions::Argument->throw( "Pattern '$opts{pattern}' does not match the size of $opts{file}" ); } # This should be pretty straight-forward: for my $index (0 .. $#pattern) { my $elem = $pattern[$index]; my $data = $metadata->[$index]; if (($elem eq '0') or ($elem eq 'hole')) { # This should be a hole, which means 0 in pvbn and vvbn if ($data->{pvbn} or $data->{vvbn}) { $verified = 0; } } elsif ($elem eq 'H') { # This should be a compression header if (($data->{pvbn} ne '(comp-hd)') or ($data->{vvbn} ne '(comp-lzo)')) { $verified = 0; } } elsif ($elem eq 'T') { # This should be a compression trailer if (($data->{pvbn} ne '(comp-tr)') or ($data->{vvbn} ne '(comp-tr)')) { $verified = 0; } } elsif (($elem eq 'data') or ($elem eq 'dataend') or ($elem eq 'V') or ($elem eq 'zero')) { # This should be a plain data block if (($data->{pvbn} !~ /^\d+$/) or ($data->{vvbn} !~ /^\d+$/)) { $verified = 0; } } else { # Since _parse_pattern would have validated content, an unknown # $elem can only have come in from the raw $opts{pattern} NATE::Exceptions::Argument->throw( "Unknown character '$elem' in pattern '$opts{pattern}'" ); } } $Log->exit() if $may_exit; return $verified; } ############################################################################### # # Sub Name: _map_key # # Description: Generate a "key" for this object for use in %OBJECT_ID_MAP # that uniquely identifies this object. # # Arguments: NAME IN/OUT TYPE DESCRIPTION # $self in scalar Object of this class # # Returns: key # ############################################################################### sub _map_key { $Log->enter() if $may_enter; my ($self) = @_; $Log->exit() if $may_exit; return join(q{:} => $self->_client->name, $self->_volume->volume); } ############################################################################### # # Sub Name: _parse_pattern # # Description: Parse a pattern into a hash structure that will be used # by _write_to_file to control the content being written. # # Arguments: NAME IN/OUT TYPE DESCRIPTION # $self in ref Object of this class # $pattern in scalar Pattern to parse # # Returns: Success: hashref # Failure: error message # ############################################################################### sub _parse_pattern { $Log->enter() if $may_enter; my ($self, $pattern) = @_; my (@pattern, @blocks, $in_comp_group, $position, $group_data, $group_size, $total_data); my $data = {}; # Normalize the pattern into a list of characters. Factored this out so # that file_verify() can use it as well. @pattern = $self->_normalize_pattern($pattern); # Use this as an end-marker for the parsing stage below: push(@pattern, 'end'); # Process the pattern. If I had more time (and patience) I'd probably # write this up as a finite-state machine. But I don't, so I'm hacking # it for now... $total_data = 0; $position = 0; while ($position <= $#pattern) { my $char = $pattern[$position]; if ($char eq 'H') { if ($in_comp_group) { # If we see a 'H' character while already parsing a # compression group, that's an error: return 'Compression header block inside compression group'; } else { # Otherwise, it's the start of a compression group. First of # all, it has to start on a multiple of 8: if ($position % 8) { return 'Compression header block not on 8-block boundary'; } # I think that's the only requirement, so set things up: $in_comp_group = 1; $group_data = 0; $group_size = 0; $position++; } } elsif ($char eq 'V') { if ($in_comp_group) { # We're inside a compression group, so all the V-blocks should # be clumped together. Consume them all. $group_data = 1; $total_data++; $position++; while ($pattern[$position] eq 'V') { $group_data++; $total_data++; $position++; } } else { # Not in a group, so this is a stand-alone block: push(@blocks, 'data'); $total_data++; $position++; } } elsif ($char eq 'T') { if ($in_comp_group) { # The start of the trailer blocks signals the end of the # compression group. Consume them all and use that to help # determine the overall size of the compression group. $group_size = $group_data + 2; # 1 for the H, one for first T $position++; while ($pattern[$position] eq 'T') { $group_size++; $position++; } # We've consumed all the T characters, now $group_size should # be the overall size in blocks of this compression group, and # $group_data should be the number that are data blocks. push(@blocks, ('zero') x ($group_size - $group_data)); push(@blocks, ('data') x ($group_data - 1)); push(@blocks, 'dataend'); $in_comp_group = 0; } else { # A 'T' outside a compression group is an error: return 'Trailer block encountered outside compression group'; } } elsif ($char eq '0') { if ($in_comp_group) { return 'Hole block encountered inside compression group'; } push(@blocks, 'hole'); $position++; } elsif ($char eq 'end') { # This is the end of the pattern. Make sure our internal state is # sane. Then set up $data for the return. if ($in_comp_group) { return 'End-of-pattern reached inside compression group'; } $data->{blocks} = \@blocks; $data->{total_data} = $total_data; $data->{total_size} = scalar(@blocks); $position++; # This should trigger the end of the while-loop } else { return "Unknown character '$char'"; } } $Log->exit() if $may_exit; return $data; } ############################################################################### # # Sub Name: _normalize_pattern # # Description: Convert a pattern string to a list of pattern characters # by removing all unused characters and splitting. # # Arguments: NAME IN/OUT TYPE DESCRIPTION # $self in ref Object of this class # $pattern in scalar Pattern string to convert # # Returns: list # ############################################################################### sub _normalize_pattern { $Log->enter() if $may_enter; my ($self, $pattern) = @_; # Start by removing all optional characters from $pattern: spaces and # any lower-case letters and numerals that immediately follow a "V". $pattern =~ s/V[a-z]+|V[1-9]\d*/V/g; $pattern =~ s/\s+//g; $Log->exit() if $may_exit; return split(// => $pattern); } ############################################################################### # # Sub Name: _write_to_file # # Description: Write a pattern of data to the given file. $pattern is # not the raw text given by the user, but rather the # parsed result from _parse_pattern(). # # Arguments: NAME IN/OUT TYPE DESCRIPTION # $self in ref Object of this class # $file in scalar File name to write to # $pattern in hashref Parsed pattern representation # $offset in scalar If given, the file offset to # move to before writing # # Globals: $ZEROBUF # # Returns: Success: void # Failure: throws exception # ############################################################################### sub _write_to_file { $Log->enter() if $may_enter; my ($self, $file, $pattern, $offset) = @_; my ($filename, $databuf, $fh, $page, $flags); $offset ||= 0; # Avoid warnings about undef # The full filename we're opening and writing to: $filename = $self->mount_point . "/$file"; # Create enough random data for use in the file we are going to write: $databuf = q{}; for (1 .. ($pattern->{total_data} * 4096)) { $databuf .= chr(rand(256)); } # The flags for sysopen(), based on whether $filename already exists: $flags = O_WRONLY | O_TRUNC; if (! -f $filename) { $flags |= O_CREAT; } # The "page number" into $databuf to use for data-writes $page = 0; # Open the file: if (! sysopen($fh, $filename, $flags)) { NATE::BaseException->throw("Error opening file $filename: $!"); } # If there was an offset given, move the file ptr ahead by that many # blocks: if ($offset) { sysseek($fh, $offset*4096, SEEK_SET); } for my $block (@{$pattern->{blocks}}) { if ($block eq 'hole') { # For a hole, write nothing. Just advance the pointer by one block. if (! sysseek($fh, 4096, SEEK_CUR)) { NATE::BaseException->throw("Error seeking on $filename: $!"); } } elsif ($block eq 'zero') { # Write the $ZEROBUF string to this block. if (! syswrite($fh, $ZEROBUF)) { NATE::BaseException->throw( "Error writing zero-data to $filename: $!" ); } } elsif ($block eq 'data') { # Write the next "page" of data from $databuf. if (! syswrite($fh, $databuf, 4096, $page*4096)) { NATE::BaseException->throw( "Error writing data to $filename: $!" ); } $page++; } elsif ($block eq 'dataend') { # This write only does 3584 bytes, to leave room for the # compression encoding for the leading zeros. if (! syswrite($fh, $databuf, 3584, $page*4096)) { NATE::BaseException->throw( "Error writing end-data to $filename: $!" ); } $page++; } } # Do a truncate to make sure the file is the proper size: truncate($fh, ($pattern->{total_size} + $offset)*4096); # We're finished, close the file: if (! close($fh)) { NATE::BaseException->throw("Error closing file $filename: $!"); } $Log->exit() if $may_exit; return; } ############################################################################### # # Sub Name: DESTROY # # Description: Destructor. Possible delete the files this object created, # possibly delete mount-point (if created locally). Free up # the client object if created locally. # # Arguments: NAME IN/OUT TYPE DESCRIPTION # $self in scalar Object of this class # # Returns: Success: void # Failure: throws exception # ############################################################################### sub DESTROY { $Log->enter() if $may_enter; my ($self) = @_; # Remove the key for this object from %OBJECT_ID_MAP: delete $OBJECT_ID_MAP{$self->_map_key}; # Unless the user specifically asked this object to retain all locally- # created resources, purge anything we created. # All files created by this object if (! $self->_preserve_files) { # Don't want an exception from file_delete() to stop us from finishing # destruction... try { $self->file_delete(file => [ $self->_files_keys ]); } catch NATE::BaseException with { my $exception = shift; my $type = ref($exception); $Log->warn( "DESTROY caught $type exception while deleting files: " . $exception->text ); }; } # The mount-point itself if (! $self->_preserve_mount) { # Don't purge the mount if we didn't create it... if ($self->_local_mount_obj) { try { $self->_mount->purge; } catch NATE::BaseException with { my $exception = shift; my $type = ref($exception); $Log->warn( "DESTROY caught $type exception while purging mount: " . $exception->text ); }; # Free resources and cause its destructor (if any) to be called $self->_mount_clear; } } # I *think* that this is overkill, but just in case of circular references # explicitly undef the client object if we created it, so as to free # connection resources: if ($self->_local_client_obj) { $self->_client_clear; } $Log->exit() if $may_exit; return; } 1;