#!/usr/bin/perl -w

package File::Grep;

use strict;
use Carp;

BEGIN {
  use Exporter   ();
  use vars       qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  $VERSION     = sprintf( "%d.%02d", q( $Revision: 0.02 $ ) =~ /\s(\d+)\.(\d+)/ );
  @ISA         = qw(Exporter);
  @EXPORT      = qw();
  @EXPORT_OK   = qw( fgrep fmap fdo );
  %EXPORT_TAGS = (  );
}

# Remain silent on bad files, else shoutout.
our $SILENT = 1;

# Internal function; does the actual walk through the files, and calls 
# out to the coderef to do the work for each line.  This gives me a bit
# more flexibility with the end interface

sub _fgrep_process {
  my ( $closure, @files ) = @_;
  my $openfile = 0;
  my $abort = 0;
  my $i = 0;
  foreach my $file ( @files ) {
    my $fh;
    if ( UNIVERSAL::isa( \$file, "SCALAR" ) ) {
      # If it's a scalar, assume it's a file and open it
      open FILE, "$file" or 
	( !$SILENT and carp "Cannot open file '$file' for fgrep: $!" ) 
	  and next;
      $fh = \*FILE;
      $openfile = 1;
    } else {
      # Otherwise, we will assume it's a legit filehandle.  
      # If something's
      # amiss, we'll catch it at <> below.
      $fh = $file;
      $openfile = 0;
    }
    my $line;
    eval { $line = <$fh> };
    # Fix for perl5.8 - thanks to Benjamin Kram
    if ( $@ ) {
      !$SILENT and carp "Cannot use file '$file' for fgrep: $@";
      last;
    } else {
      while ( defined( $line ) ) {
	my $state = &$closure( $i, $., $line );
	if ( $state < 0 ) { 
	  # If need to shut down whole process...
	  $abort = 1;
	  last; # while!
	} elsif ( $state == 0 ) {
	  # If need to shut down just this file...
	  $abort = 0;
	  last; # while!
	}
	$line = <$fh>;
      }
    }
    if ( $openfile ) { close $fh; }
    last if ( $abort );  # Fileloop...
    $i++; # Increment counter
  }
  return;
}

sub fgrep (&@) {
  my ( $coderef, @files ) = @_;
  if ( wantarray ) {
    my @matches = map { { filename => $_,
			 count => 0,
			   matches => { } } } @files;
    my $sub = sub { 
      my ( $file, $pos, $line ) = @_;
      local $_ = $line;
      if ( &$coderef( $file, $pos, $_ ) ) { 
	$matches[$file]->{ count }++;
	$matches[$file]->{ matches }->{ $pos } = $line;
      } 
      return 1;
    };

    _fgrep_process( $sub, @files );
    return @matches;

  } elsif ( defined( wantarray ) ) {
    my $count = 0;
    my $sub = sub {
      my ( $file, $pos, $line ) = @_;
      local $_ = $line;
      if ( &$coderef( $file, $pos, $_ ) ) { $count++ };
      return 1;
    };
    
    _fgrep_process( $sub, @files );
    return $count;
  } else {
    my $found = 0;
    my $sub = sub {
      my ( $file, $pos, $line ) = @_;
      local $_ = $line;
      if ( &$coderef( $file, $pos, $_ ) ) 
	{ $found=1; return -1; } 
      else 
	{ return 1; }
    };
    _fgrep_process( $sub, @files );
    return $found;
  }
}

sub fgrep_flat (&@) {
  my ( $coderef, @files ) = @_;
  my @matches;
  my $sub = sub {
    my ( $file, $pos, $line ) = @_;
    local $_ = $line;
    if ( &$coderef( $file, $pos, $_ ) ) {
      push @matches, $line;
      return 1;
    }
  };
  _fgrep_process( $sub, @files );
  return @matches;
}

sub fgrep_into ( &$@ ) {
  my ( $coderef, $arrayref, @files ) = @_;
  my $sub = sub {
    my ( $file, $pos, $line ) = @_;
    local $_ = $line;
    if ( &$coderef( $file, $pos, $_ ) ) {
      push @$arrayref, $line;
      return 1;
    }
  };
  _fgrep_process( $sub, @files );
  return $arrayref;
}

sub fmap (&@) {
  my ( $mapper, @files ) = @_;

  my @mapped;
  my $sub = sub {
    my ( $file, $pos, $line ) = @_;
    local $_ = $line;
    push @mapped, &$mapper( $file, $pos, $_ );
    return 1;
  };
  _fgrep_process( $sub, @files );
  return @mapped;
}

sub fdo (&@) {
  my ( $doer, @files ) = @_;
  my $sub = sub {
    my ( $file, $pos, $line ) = @_;
    local $_ = $line;
    &$doer( $file, $pos, $_ );
    return 1;
  };
  _fgrep_process( $sub, @files );
}

1;
__END__

=head1 NAME

File::Grep - Find matches to a pattern in a series of files and related
             functions

=head1 SYNOPSIS

  use File::Grep qw( fgrep fmap fdo );
  
  # Void context
  if ( fgrep { /$user/ } "/etc/passwd" ) { do_something(); }

  # Scalar context
  print "The index page was hit ",
	( fgrep { /index\.html/ } glob "/var/log/httpd/access.log.*"),
	" times\n";

  # Array context
  my @matches = fgrep { /index\.html } glob "/var/log/httpd/access.log.*";
  print SUMMARY $_ foreach @matches;

  # Mapping
  my @lower = fmap { chomp; lc; } glob "/var/log/httpd/access.log.*";

  # Foreach style..
  my $count;
  fdo { $count++ } @filelist;
  print "Total lines: $count\n";
 
  # More complex handling
  my @matchcount;
  fdo { my ( $file, $pos, $line ) = @_;
        $matchcount[$file]++ if ( $line =~ /keyword/ );
      } @filelist;


=head1 DESCRIPTION

File::Grep mimics the functionality of the grep function in perl, but
applying it to files instead of a list.  This is similar in nature to 
the UNIX grep command, but more powerful as the pattern can be any legal
perl function. 

The main functions provided by this module are:

=over

=item fgrep BLOCK LIST

Performs a grep operation on the files in LIST, using BLOCK as the
critiria for accepting a line or not.  Any lines that match will be 
added to an array that will be returned to the caller.  Note that 
in void context, this function will immediate return true on the first
match, false otherwise, and in scalar context, it will only return
the number of matches.

When entering BLOCK, the $_ variable will be localized to the current
line.  In addition, you will be given the position in LIST of the current
file, the line number in that file, and the line itself as arguments 
to this function.  While you can change $_ if necessary, only the 
original value of the line will be added to the returned list.  If you
need to get the modified value, use fmap (described below).

The LIST can contain either scalars or filehandle (or filehandle-like
objects).  If the item is a scalar, it will be attempted to be opened 
and read in as normal.  Otherwise it will be treated as a filehandle.  
Any errors resulting from IO may be reported to STDERR by setting the 
class variable, $File::Grep::SILENT to false; otherwise, no error
indication is given.

=item fmap BLOCK LIST

Performs a map operation on the files in LIST, using BLOCK as the
mapping function.  The results from BLOCK will be appended to the 
list that is returned at the end of the call.

=item fdo BLOCK LIST

Performs the equivalent of a foreach operation on the files in LIST,
performing BLOCK for each line in each file.  This function has no
return value.  If you need to specialize more than what fgrep or fmap
offer, you can use this function.

=back

In addition, if you need additional fine control, you can use the internal
function _fgrep_process.  This is called just like fgrep/fmap/fdo, as
in "_fgrep_process BLOCK LIST" except that you can control when the 
fucntion 'short circuits' by the return value from BLOCK.  If, after
processing a line, the BLOCK returns a negative number, the entire 
process is aborted, closing any open filehandles that were opened by 
the function.  If the return value is 0, the current file is aborted,
closed if opened by the function and the next file is then searched.
A positive return value will simply go on to the next line as appropriate.

=head1 EXPORT

"fgrep", "fmap", and "fdo" may be exported, but these are not set by default.

=head1 AUTHOR

Michael K. Neylon, E<lt>mneylon-pm@masemware.comE<gt>

=head1 SEE ALSO

L<perl>.

=cut