#!/usr/bin/env perl
##
## greple: extensible grep with lexical expression and region handling
##
## Since Mar 29 1991
##

use v5.14;
use warnings;

use File::stat;
use IO::Handle;
use Pod::Usage;
use Text::ParseWords qw(shellwords);
use List::Util qw(min max first sum uniq shuffle notall pairs pairmap);
use Hash::Util qw(lock_keys);
use Cwd qw(getcwd abs_path);
use Carp;

use utf8;
use Encode;
use Encode::Guess;
use open IO => ':utf8', ':std';

use Data::Dumper;
{
    no warnings 'redefine';
    *Data::Dumper::qquote = sub { qq["${\(shift)}"] };
    $Data::Dumper::Useperl = 1;
}

##
## Setup greple/lib to be a module directory if exists.
##
BEGIN {
    if (my $lib = abs_path($0) =~ s{/(?:script/|bin/)?\w+$}{/lib}r) {
	unshift @INC, $lib if -d "$lib/App/Greple";
    }
}

use Getopt::EX::Loader;
use Getopt::EX::Func qw(parse_func callable);

use App::Greple;
use App::Greple::Common;
use App::Greple::Util;
use App::Greple::Grep;
use App::Greple::Regions;
use App::Greple::Pattern;
use App::Greple::Pattern::Holder;
use App::Greple::Filter;

my $version = $App::Greple::VERSION;

=encoding utf8

=head1 NAME


greple - extensible grep with lexical expression and region control


=head1 VERSION


Version 9.1505


=head1 SYNOPSIS


B<greple> [B<-M>I<module>] [ B<-options> ] pattern [ file... ]

  PATTERN
    pattern              'and +must -not ?optional &function'
    -x, --le   pattern   lexical expression (same as bare pattern)
    -e, --and  pattern   pattern match across line boundary
    -r, --must pattern   pattern cannot be compromised
    -t, --may  pattern   pattern may be exist
    -v, --not  pattern   pattern not to be matched
    -E, --re   pattern   regular expression
        --fe   pattern   fixed expression
    -f, --file file      file contains search pattern
    --select index       select indexed pattern from -f file
  MATCH
    -i, --ignore-case    ignore case
    -G, --capture-group  match capture groups rather than whole pattern
    -S, --stretch        stretch matched area to the enclosing block
    --need=[+-]n         required positive match count
    --allow=[+-]n        acceptable negative match count
    --matchcount=n[,m]   required match count for each block
  STYLE
    -l                   list filename only
    -c                   print count of matched block only
    -n                   print line number
    -H, -h               do or do not display filenames
    -o                   print only the matching part
    --all                print entire data
    -m, --max=n[,m]      max count of blocks to be shown
    -A,-B,-C [n]         after/before/both match context
    --join               delete newline in the matched part
    --joinby=string      replace newline in the matched text by string
    --nonewline          do not add newline character at block end
    --filestyle=style    how filename printed (once, separate, line)
    --linestyle=style    how line number printed (separate, line)
    --separate           set filestyle and linestyle both "separate"
    --format LABEL=...   define line number and file name format
    --frame-top          top frame
    --frame-middle       middle frame
    --frame-bottom       bottom frame
  FILE
    --glob=glob          glob target files
    --chdir=dir          change directory before search
    --readlist           get filenames from stdin
  COLOR
    --color=when         use terminal color (auto, always, never)
    --nocolor            same as --color=never
    --colormap=color     R, G, B, C, M, Y etc.
    --colorsub=...       shortcut for --colormap="sub{...}"
    --colorful           use default multiple colors
    --colorindex=flags   color index method: Ascend/Descend/Block/Random/Group
    --ansicolor=s        ANSI color 16, 256 or 24bit
    --[no]256            same as --ansicolor 256 or 16
    --regioncolor        use different color for inside/outside regions
    --uniqcolor          use different color for unique string
    --uniqsub=func       preprocess function before check uniqueness
    --random             use random color each time
    --face               set/unset visual effects
  BLOCK
    -p, --paragraph      paragraph mode
    --border=pattern     border pattern
    --block=pattern      block of records
    --blockend=s         block end mark (Default: "--")
    --join-blocks        join back-to-back consecutive blocks
  REGION
    --inside=pattern     select matches inside of pattern
    --outside=pattern    select matches outside of pattern
    --include=pattern    reduce matches to the area
    --exclude=pattern    reduce matches to outside of the area
    --strict             strict mode for --inside/outside --block
  CHARACTER CODE
    --icode=name         file encoding
    --ocode=name         output encoding
  FILTER
    --if,--of=filter     input/output filter command
    --pf=filter          post process filter command
    --noif               disable default input filter
  RUNTIME FUNCTION
    --begin=func         call function before search
    --end=func           call function after search
    --prologue=func      call function before command execution
    --epilogue=func      call function after command execution
    --postgrep=func      call function after each grep operation
    --callback=func      callback function for matched string
  OTHER
    --usage[=expand]     show this message
    --exit=n             command exit status
    --norc               skip reading startup file
    --man                display command or module manual page
    --show               display module file
    --path               show module file path
    --error=action       action after read error
    --warn=type          run time error control
    --alert [name=#]     set alert parameter (size/time)
    -d flags             display info (f:file d:dir c:color m:misc s:stat)

=cut

my @baseclass = qw( App::Greple Getopt::EX );
my $rcloader = Getopt::EX::Loader
    ->new(BASECLASS => \@baseclass);

my @optargs;
my %optargs;

sub newopt {
    push @optargs, pairmap {
	local $_ = $a;
	s/\s+//g;
	s/^(?=\w+-)([-\w]+)/$1 =~ tr[-][_]r . "|$1"/e; # "a-b" -> "a_b|a-b"
	/^(\w+)/ and $optargs{$1} = $b if ref $b ne 'CODE';
	$_ => $b;
    } @_;
}

sub opt :lvalue {
    my $name = shift;
    my $var = $optargs{$name} or die "$name: invalid option name\n";
    if (ref $var eq 'SCALAR') {
	return $$var;
    } elsif (ref $var eq 'HASH' and @_ == 1) {
	return $var->{+shift};
    } else {
	return $var;
    }
}

my @opt_pattern;
sub opt_pattern {
    push @opt_pattern, [ map "$_", @_ ];
    $opt_pattern[-1];
}

my @opt_colormap;
sub opt_colormap { push @opt_colormap, $_[1] }
sub opt_colorsub { push @opt_colormap, "sub{ $_[1] }" }

my %opt_format = (LINE => '%d:', FILE => '%s:');
my %opt_alert  = (size => 512 * 1024, time => 2);
my %opt_warn   = (read => 0, skip => 1, retry => 0, begin => 0);

newopt

    ##
    ## PATTERN
    ##
    ' and    |e =s ' => \&opt_pattern ,
    ' must   |r =s ' => \&opt_pattern ,
    ' may    |t =s ' => \&opt_pattern ,
    ' not    |v =s ' => \&opt_pattern ,
    ' le     |x =s ' => \&opt_pattern ,
    ' re     |E =s ' => \&opt_pattern ,
    ' fe        =s ' => \&opt_pattern ,
    ' file   |f =s ' => \ my @opt_f ,
    ' select    =s ' => \ my $opt_select ,

    ##
    ## MATCH
    ##
    ' ignore-case        |i  !  ' => \ my $opt_i ,
    ' need                   =s ' => \ my @opt_need ,
    ' allow                  =s ' => \ my @opt_allow ,
    ' matchcount         |mc =s ' => \ my $opt_matchcount ,
    ' capture-group      |G  !  ' => \ my $opt_capture_group ,
    ' stretch            |S  !  ' => \ my $opt_stretch ,

    ##
    ## STYLE
    ##
    ' files-with-matches |l     ' => \ my $opt_l ,
    ' count              |c     ' => \ my $opt_c ,
    ' line-number        |n  !  ' => \ my $opt_n ,
    ' filename           |H     ' => \ my $opt_H ,
    ' no-filename        |h     ' => \ my $opt_h ,
    ' only-matching      |o  !  ' => \ my $opt_o ,
    ' all                    !  ' => \ my $opt_all ,
    ' max-count          |m  =s ' => \ my $opt_m ,
    ' after-context      |A  :2 ' => \(my $opt_A = 0) ,
    ' before-context     |B  :2 ' => \(my $opt_B = 0) ,
    ' context            |C  :2 ' => \(my $opt_C = 0) ,
    ' join                   !  ' => \ my $opt_join ,
    ' joinby                 =s ' => \(my $opt_joinby = "") ,
    ' newline                !  ' => \(my $opt_newline = 1) ,
    ' filestyle          |fs =s ' => \(my $opt_filestyle = 'line') ,
    ' linestyle          |ls =s ' => \(my $opt_linestyle = 'line') ,
    ' separate ' => sub {
	opt('filestyle') = opt('linestyle') = $_[0];
    },
    ' format                 =s ' => \    %opt_format ,
    ' frame-top              :s ' => \(my $opt_frame_top    = '') ,
    ' frame-middle           :s ' => \(my $opt_frame_middle = '') ,
    ' frame-bottom           :s ' => \(my $opt_frame_bottom = '') ,

    ##
    ## FILE
    ##
    ' glob                   =s ' => \ my @opt_glob ,
    ' chdir                  =s ' => \ my @opt_chdir ,
    ' readlist               !  ' => \ my $opt_readlist ,

    ##
    ## COLOR
    ##
    ' color                  =s ' => \(my $opt_color = 'auto') ,
    ' colormap           |cm =s ' => \&opt_colormap ,
    ' colorsub           |cs =s ' => \&opt_colorsub ,
    ' colorful              !   ' => \(my $opt_colorful = 1) ,
    ' colorindex         |ci =s ' => \(my $opt_colorindex = '') ,
    ' ansicolor              =s ' => \(my $opt_ansicolor = '256') ,
    ' regioncolor        |rc !  ' => \ my $opt_regioncolor ,
    ' uniqcolor          |uc !  ' => \(my $opt_uniqcolor) ,
    ' uniqsub            |us =s ' => \ my @opt_uniqsub ,
    ' face                   =s ' => \ my @opt_face ,
    ' nocolor | no-color ' => sub {
	opt('color') = 'never';
    },
    ' 256! ' => sub {
	opt('ansicolor') = $_[1] ? '256' : '16';
    },
    ' random! ' => sub {
	if ($_[1]) { opt('colorindex') .= 'R' }
	else       { opt('colorindex') =~ s/R//gi }
    },

    ##
    ## BLOCK
    ##
    ' paragraph          |p  !  ' => \ my $opt_p ,
    ' border                 =s ' => \ my $opt_border ,
    ' block                  =s ' => \ my @opt_block ,
    ' blockend               :s ' => \(my $opt_blockend) ,
    ' join-blocks            !  ' => \(my $opt_join_blocks = 0) ,

    ##
    ## REGION
    ##
    ' inside                 =s ' => \ my @opt_inside ,
    ' outside                =s ' => \ my @opt_outside ,
    ' include                =s ' => \ my @opt_include ,
    ' exclude                =s ' => \ my @opt_exclude ,
    ' strict                 !  ' => \(my $opt_strict = 0) ,

    ##
    ## CHARACTER CODE
    ##
    ' icode                  =s ' => \ my @opt_icode ,
    ' ocode                  =s ' => \ my $opt_ocode ,

    ##
    ## FILTER
    ##
    ' if                     =s ' => \ my @opt_if ,
    ' of                     =s ' => \ my @opt_of ,
    ' pf                     =s ' => \ my @opt_pf ,
    ' noif                      ' => \ my $opt_noif ,

    ##
    ## RUNTIME FUNCTION
    ##
    ' print                  =s ' => \ my @opt_print ,
    ' continue               !  ' => \ my $opt_continue ,
    ' callback               =s ' => \ my @opt_callback ,
    ' begin                  =s ' => \ my @opt_begin ,
    ' end                    =s ' => \ my @opt_end ,
    ' prologue               =s ' => \ my @opt_prologue ,
    ' epilogue               =s ' => \ my @opt_epilogue ,
    ' postgrep               =s ' => \ my @opt_postgrep ,

    ##
    ## OTHERS
    ##
    ' usage                  :s ' => \ my $opt_usage ,
    ' exit                   =i ' => \ my $opt_exit ,
    # norc
    ' man  | doc                ' => \ my $opt_man ,
    ' show | less               ' => \ my $opt_show ,
    ' path                      ' => \ my $opt_path ,
    ' error                  =s ' => \(my $opt_error = 'skip') ,
    ' alert                  =i ' => \    %opt_alert ,
    ' debug              |d  =s ' => \ my @opt_d ,
    ' persist                   ' => sub {
	warn "Option --persist will be deprecated. Use --error=retry.\n";
	opt('error') = 'retry';
    },
    'warn|w:1%' => sub {
	map { $opt_warn{$_} = $_[2] }
	map { $_ eq 'all' ? keys %opt_warn : $_ }
	$_[1] =~ /\w+/g;
    },

    ##
    ## MODULE
    ##
    'M:s' => sub {
	warn "Use -M option at the beginning with module name.\n";
	if (my @modules = uniq($rcloader->modules())) {
	    warn "Available modules:\n";
	    warn "\t", join("\n\t", @modules), "\n";
	}
	exit 2;
    },

    ##
    ## UNDOCUMENTED
    ##
    ' clean                  !  ' => \ my $opt_clean ,

;

sub setopt {
    my $opt = ref $_[0] eq 'HASH' ? shift : {};
    my $name = shift;
    if (exists $optargs{$name}) {
	my $ref = $optargs{$name};
	if (ref $ref eq 'ARRAY') {
	    if ($opt->{append}) {
		push @$ref, @_;
	    } else {
		@$ref = @_;
	    }
	}
	elsif (ref $ref eq 'CODE') {
	    &$ref($name, @_);
	}
	elsif (ref $ref eq 'SCALAR') {
	    $$ref = shift;
	}
	else {
	    die "Object error.";
	}
    }
}

##
## @ARGV stuff
##

require Getopt::Long;
my $parser = Getopt::Long::Parser->new(
    config => [ qw(bundling no_getopt_compat no_ignore_case) ],
    );
sub configure_getopt { $parser->configure(@_) }

configure_getopt qw(debug) if $ENV{DEBUG_GETOPT};
$Getopt::EX::Loader::debug = 1 if $ENV{DEBUG_GETOPTEX};

## decode
@ARGV = map { utf8::is_utf8($_) ? $_ : decode('utf8', $_) } @ARGV;

## ~/.greplerc
unless ((@ARGV and $ARGV[0] eq "--norc" and shift)
	or
	($ENV{GREPLE_NORC}) ) {
    $rcloader->load(FILE => "$ENV{HOME}/.greplerc");
}

## modules
$rcloader->deal_with(\@ARGV);

push @optargs, $rcloader->builtins;

## ENV
$ENV{'GREPLEOPTS'} and unshift @ARGV, shellwords($ENV{'GREPLEOPTS'});


## GetOptions
my @SAVEDARGV = @ARGV;
$parser->getoptions(@optargs) || usage();

our %opt_d;
@opt_d = map { split // } @opt_d;
@opt_d{@opt_d} = @opt_d;

if ($opt_d{o}) {
    warn "\@ARGV = ", join(' ', shellquote(@SAVEDARGV)), "\n";
}

## -m option
my @splicer = (not defined $opt_m) ? () : do {
    my @param = split /,/, $opt_m, -1;
    push @param, '' if @param % 2;
    if (notall { /^(-?\d+)?$/ } @param) {
	die "$opt_m: option format error.\n";
    }
    map {
	my($offset, $length) = @$_;
	if ($length ne '') {
	    sub { splice @{+shift}, $offset || 0, $length }
	} else {
	    sub { splice @{+shift}, $offset || 0 }
	}
    }
    pairs @param;
};

my $file_code;
my $default_icode = 'utf8';	# default input encoding
my @default_icode_list = qw(euc-jp 7bit-jis);
my $output_code;
my $default_ocode = 'utf8';	# default output encoding

$output_code = $opt_ocode || $default_ocode;
binmode STDOUT, ":encoding($output_code)";

## show unused option characters
if ($opt_d{u}) {
    my $s = join('','0'..'9',"\n",'a'..'z',"\n",'A'..'Z',"\n");
    map { /\|([0-9a-zA-Z])\b/ && $s =~ s/$1/./ } @optargs;
    die $s;
}

## show man pages
if ($opt_man or $opt_show or $opt_path) {
    my @bucket = $rcloader->buckets;
    if (@bucket and not default_module($bucket[-1])) {
	my $module = $bucket[-1]->module;
	my $jp = first { -x "$_/perldocjp" } split /:/, $ENV{PATH};
	my $perldoc = $jp ? "perldocjp" : "perldoc";
	$ENV{PERL5LIB} = join ':', @INC;
	my $file = $module =~ s[::][/]gr . '.pm';
	die unless $INC{$file};
	if ($opt_man) {
	    exec "$perldoc $module" or die $!;
	} else {
	    chomp(my $file = `$perldoc -ml $module`);
	    if ($opt_path) {
		say $file;
	    } else {
		my $pager = $ENV{PAGER} || 'less';
		exec "$pager $file" or die $!;
	    }
	}
	exit;
    }
    pod2usage({-verbose => 2});
    die;
}

sub default_module {
    my $mod = shift;
    my $module = $mod->module;
    return 1 if $module =~ /\b \.greplerc $/x;
    return 1 if $module =~ /\b default $/x;
    return 0;
}

## setup file encoding
if (@opt_icode) {
    @opt_icode = map { split /[,\s]+/ } @opt_icode;
    if (grep { s/^\+// } @opt_icode) {
	unshift @opt_icode, @default_icode_list;
    }
    @opt_icode = uniq @opt_icode;
    if (@opt_icode > 1) {
	@opt_icode = grep { !/(?:auto|guess)$/i } @opt_icode;
	Encode::Guess->set_suspects(@opt_icode);
	$file_code = 'Guess';
    }
    elsif ($opt_icode[0] =~ /^(?:guess|auto)$/i) {
	Encode::Guess->set_suspects(@default_icode_list);
	$file_code = 'Guess';
    } else {
	$file_code = $opt_icode[0];
    }
}
else {
    $file_code = $default_icode;
}

##
## Patterns
##

my $pat_holder = App::Greple::Pattern::Holder->new;

my $FLAG_BASE = FLAG_NONE;
$FLAG_BASE |= FLAG_IGNORECASE if $opt_i;

if (@opt_f) {
    for my $opt_f (@opt_f) {
	$pat_holder->append({ flag => $FLAG_BASE, type => 'file',
			      $opt_select ? (select => $opt_select) : (),
			    },
			    $opt_f);
    }
} else {
    unless (grep { $_->[0] !~ /^(not|may)/ } @opt_pattern) {
	unshift @opt_pattern, [ le => shift @ARGV // &usage ];
    }
}

my %pat_flag = (
    must => FLAG_REGEX | FLAG_COOK | FLAG_REQUIRED,
    not  => FLAG_REGEX | FLAG_COOK | FLAG_NEGATIVE,
    may  => FLAG_REGEX | FLAG_COOK | FLAG_OPTIONAL,
    le   => FLAG_REGEX | FLAG_COOK | FLAG_LEXICAL,
    and  => FLAG_REGEX | FLAG_COOK,
    re   => FLAG_REGEX,
    fe   => FLAG_NONE,
);
for (@opt_pattern) {
    my($attr, @opt) = @$_;
    my $flag = $FLAG_BASE | $pat_flag{$attr};
    $pat_holder->append({ flag => $flag, type => 'pattern' }, @opt);
}
# $pat_holder->optimize;

##
## if optional pattern exist, make all non-optional pattern as required
##
{
    my @patterns = $pat_holder->patterns;
    my @posi = grep { $_->is_positive } @patterns;
    my @opti = grep { $_->is_optional } @posi;
    if (@opti > 0) {
	for my $p (grep { !$_->is_optional } @posi) {
	    $p->flag($p->flag | FLAG_REQUIRED);
	}
    }
}

##
## set $count_must, $count_need and $opt_allow
##
my $count_must = 0;
my $count_need;
my $count_allow = 0;
{
    my $must = grep({ $_->is_required } $pat_holder->patterns);
    my $posi = grep({ $_->is_positive } $pat_holder->patterns) - $must;
    my $nega = grep({ $_->is_negative } $pat_holder->patterns);

    $count_must = $must // 0;
    $count_need = $must ? 0 : $posi;
    for (@opt_need) {
	if (/^-(\d+)$/) {	# --need -n
	    $count_need = $posi - $1;
	}
	elsif (/^\+(\d+)$/) {	# --need +n
	    $count_need += $1;
	}
	elsif (/^(\d+)$/) {	# --need n
	    $count_need = $1 - $must;
	}
	else {
	    die "$_ is not valid count.\n"
	}
    }

    $count_allow = 0;
    for (@opt_allow) {
	if (/^-(\d+)$/) {	# --allow -n
	    $count_allow = $nega - $1;
	}
	elsif (/^\+(\d+)$/) {	# --allow +n
	    $count_allow += $1;
	}
	elsif (/^(\d+)$/) {	# --allow n
	    $count_allow = $1;
	}
	else {
	    die "$_ is not valid count.\n"
	}
    }
}

##
## --matchcount
##
my $count_match_sub = sub {
    local $_ = shift or return;
    /[^\d,]/ and die "$_ is not valid count.\n";
    my @c = map { $_ eq '' ? 0 : int } split(/,/, $_, -1);
    if (@c == 1) {
	return sub { $_[0] == $c[0] };
    }
    push @c, -1 if @c % 2;
    return sub {
	my @n = @c;
	while (my($min, $max) = splice(@n, 0, 2)) {
	    return 0 if $_[0] < $min;
	    return 1 if $max <= 0 || $_[0] <= $max;
	}
	return 0;
    }
}->($opt_matchcount);

##
## setup input/output filter
##
my $filter_d = App::Greple::Filter->new->parse(@opt_if);
unless ($opt_noif) {
    $filter_d->append(
	[ sub { s/\.Z$//   }, 'zcat' ],
	[ sub { s/\.g?z$// }, 'gunzip -c' ],
	[ sub { m/\.pdf$/i }, 'pdftotext -nopgbrk - -' ],
	[ sub { s/\.gpg$// }, 'gpg --quiet --no-mdc-warning --decrypt' ],
	);
}

##------------------------------------------------------------
## miscellaneous setups
##

my @argv_files;
my $start_directory;
my $need_filename = ($opt_H or $opt_l);
my $current_file;

if (@opt_chdir) {
    $start_directory = getcwd;
    @opt_chdir = uniq(map { glob $_ } @opt_chdir);
    push @argv_files, splice(@ARGV);
    unless ($opt_h or
	    (@opt_chdir == 1 and @argv_files == 1 and @opt_glob == 0)) {
	$need_filename++;
    }
}
elsif (@opt_glob) {
    push @ARGV, map(glob, @opt_glob);
}

push(@ARGV, '-') unless @ARGV || @argv_files || @opt_glob || $opt_readlist;
if ((@ARGV > 1 or $opt_readlist) and not $opt_h) {
    $need_filename++;
}

$opt_filestyle = 'none' if not $need_filename;

$opt_join = 1 if $opt_joinby ne "";

##------------------------------------------------------------
## colors
##
my %colormap = (
    FILE     => "G",
    LINE     => "Y",
    TEXT     => "",
    BLOCKEND => "/WE",
    PROGRESS => "B",
    TOP      => "",
    MIDDLE   => "",
    BOTTOM   => "",
    );

my @colors;

use Getopt::EX::Colormap;
my $color_handler = Getopt::EX::Colormap
    ->new(HASH => \%colormap, LIST => \@colors)
    ->load_params(@opt_colormap);

my @default_color =
    $opt_ansicolor eq '16'
    ? qw(RD GD BD CD MD YD)
    : qw(000D/544 000D/454 000D/445
	 000D/455 000D/545 000D/554
	 000D/543 000D/453 000D/435
	 000D/534 000D/354 000D/345
	 000D/444
	 000D/433 000D/343 000D/334
	 000D/344 000D/434 000D/443
	 000D/333)
    ;

if ($color_handler->list == 0) {
    $color_handler->append
	($opt_colorful ? @default_color : $default_color[0]);
}

if ($opt_ansicolor eq '24bit') {
    no warnings 'once';
    $Getopt::EX::Colormap::RGB24 = 1;
}

for my $opt (@opt_face) {
    while ($opt =~ /(?<mk>[-+=]) (?<s>[^-+=]*) | (?<s>[^-+=]+) /xg) {
	my($mk, $s) = ($+{mk} // '', $+{s});
	for my $c (@colors) {
	    if ($mk eq '-') {
		$c =~ s/[\Q$+{s}\E]//g if $s ne '';
	    } elsif ($mk eq '=') {
		$c = $s;
	    } else {
		my $spec = $s =~ s/[\Q$c\E]//gr;
		$c .= '^' . ${spec} if $spec;
	    }
	}
    }
}

my $need_color = (($opt_color eq 'always')
		  or (($opt_color eq 'auto') and (!$opt_o and -t STDOUT)));

if (!$need_color) {
    $Getopt::EX::Colormap::NO_COLOR = 1;
}

for (values %opt_format) {
    s{\\(.)}{ { t => "\t", n => "\n" }->{$1} // $1 }egx;
}

my $blockend = "--";
if (defined $opt_blockend) {
    $blockend = $opt_blockend =~ s/\\n/\n/gr;
}

my $_file     = sub { $color_handler->color('FILE' , sprintf($opt_format{FILE}, $_[0])) };
my $_line     = sub { $color_handler->color('LINE' , sprintf($opt_format{LINE}, $_[0])) };
my $_text     = sub { $color_handler->color('TEXT' , $_[0]) };
my $_blockend = $color_handler->color('BLOCKEND', $blockend);
my $_top      = $color_handler->color('TOP'     , $opt_frame_top);
my $_middle   = $color_handler->color('MIDDLE'  , $opt_frame_middle);
my $_bottom   = $color_handler->color('BOTTOM'  , $opt_frame_bottom);

sub index_color {
    $color_handler->index_color(@_);
}

sub color {
    $color_handler->color(@_);
}

my $uniq_color = UniqIndex->new(
    ignore_newline => 1,
    prepare => \@opt_uniqsub,
    );

sub dump_uniqcolor {
    my $list  = $uniq_color->list;
    my $count = $uniq_color->count;
    for my $i (keys @$list) {
	warn sprintf("%3d (%3d) %s\n",
		     $i, $count->[$i],
		     index_color($i, $list->[$i]));
    }
}

# --colorindex
my $indexer = do {
    my %ci = map { uc $_ => 1 } $opt_colorindex =~ /\w/g;
    if ($ci{S}) {
	@colors = shuffle @colors;
    }
    if ($ci{A} or $ci{D}) {
	my $i = 0;
	Indexer->new(
	    index   => sub { $i++   },
	    reset   => sub { $i = 0 },
	    block   => $ci{B},
	    reverse => $ci{D},
	    );
    }
    elsif ($ci{R}) {
	Indexer->new(index => sub { int rand @colors });
    }
    else { undef }
};

# -dc
if ($opt_d{c}) {
    my $dump = sub {
	local $_ = Dumper shift;
	s/^\s*'\K([^'\s]+)(?=')/color($1, $1)/mge;
	$_;
    };
    warn 'colormap = ', $dump->(\%colormap);
    warn 'colors = ', $dump->(\@colors);
}

##
## border regex
##
my $border_re = do {
    if ($opt_border) {
	qr/$opt_border/m;	# custom
    } elsif ($opt_p) {
	qr/(?:\A|\R)\K\R+/;	# paragraph
    } else {
	qr/^/m;			# line
    }
};

if ($opt_C) {
    $opt_A ||= $opt_C;
    $opt_B ||= $opt_C;
}
my %stat = (
    file_searched => 0,
    file_tried => 0,
    length => 0,
    match_effective => 0,
    match_positive => 0,
    match_negative => 0,
    match_block => 0,
    read_retry => 0,
    time_start => [],
    time_end => [],
    );
lock_keys %stat;

##
## Setup functions
##
for my $set (
    [ "print"   , \@opt_print   , 0 ],
    [ "begin"   , \@opt_begin   , 0 ],
    [ "end"     , \@opt_end     , 0 ],
    [ "prologue", \@opt_prologue, 0 ],
    [ "epilogue", \@opt_epilogue, 0 ],
    [ "callback", \@opt_callback, 0 ],
    [ "uniqsub" , \@opt_uniqsub , 0 ],
    [ "postgrep", \@opt_postgrep, 0 ],
    [ "block"   , \@opt_block   , 1 ], # need &
    [ "inside"  , \@opt_inside  , 1 ], # need &
    [ "outside" , \@opt_outside , 1 ], # need &
    [ "include" , \@opt_include , 1 ], # need &
    [ "exclude" , \@opt_exclude , 1 ], # need &
    ) {
    my($cat, $opt, $pattern) = @$set;
    for (@{$opt}) {
	next if callable $_;
	/^&\w+/ or next if $pattern;
	$_ = parse_func($_) or die "$cat function format error: $_\n";
    }
}

my $regions = App::Greple::Regions::Holder->new;
for my $set (
    [ \@opt_inside,  REGION_INSIDE  | REGION_UNION     ],
    [ \@opt_outside, REGION_OUTSIDE | REGION_UNION     ],
    [ \@opt_include, REGION_INSIDE  | REGION_INTERSECT ],
    [ \@opt_exclude, REGION_OUTSIDE | REGION_INTERSECT ])
{
    my($opt, $flag) = @$set;
    for my $spec (@$opt) {
	$regions->append(FLAG => $flag, SPEC => $spec);
    }
}

##------------------------------------------------------------

if ($opt_d{m}) {
    warn "Search pattern:\n";
    my $i;
    for my $pat ($pat_holder->patterns) {
	my $type =
	    $pat->is_required ? 'must' :
	    $pat->is_negative ? 'not' :
	    $pat->is_positive ? 'and' : 'else';
	my $target = $pat->regex // $pat->string;
	warn sprintf("  %4s %1s %s\n",
		     $type,
		     $pat->is_function ? '&' : '',
		     @colors > 1 ? index_color($i++, $target) : $target);
    }
    warn sprintf("must = %d, need = %d, allow = %d\n",
		 $count_must, $count_need, $count_allow);
}

## push post-process filter
if (@opt_pf) {
    push_output_filter(\*STDOUT, @opt_pf);
}

usage() and exit if defined $opt_usage;

open SAVESTDIN,  '<&', \*STDIN  or die "open: $!";
open SAVESTDOUT, '>&', \*STDOUT or die "open: $!";
open SAVESTDERR, '>&', \*STDERR or die "open: $!";

sub recover_stdin {
    open STDIN, '<&', \*SAVESTDIN or die "open: $!";
}
sub recover_stderr {
    open STDERR, '>&', \*SAVESTDERR or die "open: $!";
    binmode STDERR, ':encoding(utf8)';
}
sub recover_stdout {
    close STDOUT;
    open STDOUT, '>&', \*SAVESTDOUT or die "open: $!";
}
sub close_stdout {
    close SAVESTDOUT;
    close STDOUT;
}

sub read_stdin { <SAVESTDIN> }

my $slurp = do {
    ##
    ## Setting utf8 warnings fatal makes it easy to find code conversion
    ## error, so you can choose appropriate file code or automatic code
    ## recognition, but loose a chance to find string in unrelated area.
    ##
    if ($opt_error =~ /^(?: fatal | skip | retry )$/x) {
	if ($opt_warn{read}) {
	    sub {
		use warnings FATAL => 'utf8';
		my $stdin = eval { local $/; <STDIN> };
		warn $@ if $@;
		$stdin;
	    }
	} else {
	    sub {
		use warnings FATAL => 'utf8';
		eval { local $/; <STDIN> };
	    }
	}
    } elsif ($opt_error eq 'ignore') {
	if ($opt_warn{read}) {
	    sub { local $/; <STDIN> };
	} else {
	    sub {
		close STDERR;
		my $stdin = do { local $/; <STDIN> };
		recover_stderr;
		$stdin;
	    }
	}
    } else {
	die "$opt_error: invalid action.\n";
    }
};

use Term::ANSIColor::Concise qw(ansi_code);

use constant {
    EL  => ansi_code('{EL}'),    # Erase Line
    ED  => ansi_code('{ED}'),    # Erase Display
    SCP => ansi_code('{SCP}'),   # Save Cursor Position
    RCP => ansi_code('{RCP}'),   # Restore Cursor Position
    DSC => ansi_code('{DECSC}'), # DEC Save Cursor
    DRC => ansi_code('{DECRC}'), # DEC Restore Cursor
    CR  => "\r",
};

my($progress_show, $progress_reset) = do {
    my $n;
    my($s, $e) = ! $need_color ? ('', '') :
	( ansi_code $colormap{PROGRESS}, ansi_code 'Z');
    my $print = sub { STDERR->printflush(DSC, $s, @_, $e, CR, DRC) };
    my $start = do {
	if ($opt_d{n} and $opt_d{f}) {
	    sub { $print->(++$n, " ", $current_file, ED) }
	}
	elsif ($opt_d{n}) {
	    sub { $print->(++$n) }
	}
	elsif ($opt_d{f}) {
	    sub { STDERR->printflush($current_file, ":\n") }
	}
	else {
	    undef;
	}
    };
    my $end = do {
	if ($opt_d{n}) {
	    sub { STDERR->printflush(ED) if $n }
	} else {
	    undef;
	}
    };
    ($start, $end);
};

##------------------------------------------------------------
## now ready to run.
##

## record start time
if ($opt_d{s}) {
    $stat{time_start} = [times];
}

for (@opt_prologue) { $_->call() }

grep_files();

for (@opt_epilogue) { $_->call() }

if ($opt_d{n}) {
    print STDERR ED;
}

if ($opt_uniqcolor and $opt_d{c}) {
    dump_uniqcolor();
}

## show statistic info
if ($opt_d{s}) {

    $stat{time_end} = [times];
    my @s = @{$stat{time_start}};
    my @e = @{$stat{time_end}};
    printf(STDERR "cpu %.3fu %.3fs\n", $e[0]-$s[0], $e[1]-$s[1]);

    local $" = ', ';
    for my $k (sort keys %stat) {
	my $v = $stat{$k};
	print STDERR
	    "$k: ",
	    ref $v eq 'ARRAY' ? "(@$v)" : $v,
	    "\n";
    }
}

close_stdout;

if ($opt_d{p}) {
    open STDOUT, ">&STDERR";
    system "ps -lww -p $$";
    system "lsof -p $$";
}

exit($opt_exit // ($stat{match_effective} == 0));

######################################################################

sub grep_files {
  FILE:
    while (defined($current_file = open_nextfile())) {
	my $content = $slurp->();
	$stat{file_tried}++;
	if (not defined $content) {
	    if ($opt_error eq 'fatal') {
		die "ABORT on $current_file\n";
	    }
	    if ($opt_error ne 'retry') {
		warn "SKIP $current_file\n" if $opt_warn{skip};
		next FILE;
	    }

	    # Try again
	    binmode STDIN, ':raw';
	    seek STDIN, 0, 0 or do {
		warn "SKIP $current_file (not seekable)\n"
		    if $opt_warn{skip};
		next FILE;
	    };
	    $content = $slurp->();
	    if (not defined $content) {
		warn "SKIP* $current_file\n" if $opt_warn{skip};
		next FILE;
	    }
	    warn "RETRY $current_file\n" if $opt_warn{retry};
	    $stat{read_retry}++;
	    binmode STDOUT, ':raw';
	}

	my $matched = grep_data(\$content);

	$stat{match_effective} += $matched;
	$stat{file_searched}++;
	$stat{length} += length $content;
    } continue {
	close STDIN; # wait;	# wait for 4.019 or earlier?
	# recover STDIN for opening '-' and some weird command which needs
	# STDIN opened (like unzip)
	recover_stdin;
	binmode STDOUT, ":encoding($output_code)";
    }
}

sub usage {
    pod2usage(-verbose => 0, -exitval => "NOEXIT");

    my $quote = qr/[\\(){}\|\*?]/;
    for my $bucket ($rcloader->buckets) {
	my $module = $bucket->module;
	print "    $module options:\n";
	for my $name ($bucket->options) {
	    my $help = $opt_usage ? "" : $bucket->help($name) // "";
	    next if $help eq 'ignore';
	    my @option = $bucket->getopt($name, ALL => 1);
	    printf("        %-20s %s\n", $name,
		   $help || join(' ', shellquote(@option)));
	}
	print "\n";
    }

    print "Version: $version\n";

    exit 2;
}

sub open_nextfile {

    ##
    ## --chdir
    ##
    while (@ARGV == 0 and @opt_chdir and (@argv_files or @opt_glob)) {
	my $dir = shift @opt_chdir;
	warn "chdir $dir/\n" if $opt_d{d};
	chdir $start_directory or die "$!: $start_directory\n";
	chdir $dir or die "$!: $dir\n";
	push @ARGV, @argv_files, map(glob, @opt_glob);
    }

    my $file;
    while (defined($file = shift(@ARGV)) ||
	   defined($file = $opt_readlist && read_stdin)) {
	$file = decode 'utf8', $file unless utf8::is_utf8 $file;
	$file =~ s/\n+$//;

	if (0) {}
	elsif ($file =~ /^https?:\/\//) {
	    open(STDIN, '-|') || exec("w3m -dump $file") || die "w3m: $!\n";
	}
	elsif ($file eq '-') {
	    # nothing to do
	}
	else {
	    open(STDIN, '<', $file) or do {
		warn "$file: $!\n" unless -l $file;
		next;
	    };
	}

	if (my @filters = $filter_d->get_filters($file)) {
	    push_input_filter({ &FILELABEL => $file }, @filters);
	}

	if ($file_code eq 'binary') {
	    binmode STDIN, ":raw";
	} else {
	    binmode STDIN, ":encoding($file_code)";
	}

	return $file;
    }
    undef;
}

######################################################################

sub grep_data {
    local *_ = shift;

    ##
    ## --begin
    ##
    for my $f (@opt_begin) {
	eval { $f->call(&FILELABEL => $current_file) };
	if (my $msg = $@) {
	    if ($msg =~ /^SKIP/i) {
		warn $@ if $opt_warn{begin};
		return 0;
	    } else {
		die $msg;
	    }
	}
    }

    $progress_show->() if $progress_show;

    my $grep = App::Greple::Grep->new(
	text         => \$_,
	filename     =>  $current_file,
	pattern      =>  $pat_holder,
	regions      =>  $regions,
	border       =>  $border_re,
	after        =>  $opt_A,
	before       =>  $opt_B,
	only         =>  $opt_o,
	all          =>  $opt_all,
	block        => \@opt_block,
	stretch      =>  $opt_stretch,
	must         =>  $count_must,
	need         =>  $count_need,
	countcheck   =>  $count_match_sub,
	allow        =>  $count_allow,
	strict       =>  $opt_strict,
	group_match  =>  $opt_capture_group,
	group_index  =>  $opt_colorindex =~ /G/i ? 1 : 0,
	region_index =>  $opt_regioncolor,
	stat         => \%stat,
	callback     => \@opt_callback,
	alert_size   =>  $opt_alert{size},
	alert_time   =>  $opt_alert{time},
	join_blocks  =>  $opt_join_blocks,
    )->run;
    my $matched = $grep->matched;

    ## --postgrep
    for my $f (@opt_postgrep) {
	$f->call($grep);
    }
    ## -m
    for my $splice (@splicer) {
	$splice->($grep->result_ref);
    }

    if ($opt_l) {
	if ($matched) {
	    $progress_reset->() if $progress_reset;
	    print $current_file;
	    printf ":%d", scalar $grep->blocks if $opt_c;
	    print "\n";
	}
    }
    elsif ($opt_c) {
	$progress_reset->() if $progress_reset;
	print "$current_file:" if $need_filename;
	print scalar $grep->result, "\n";
    }
    elsif (@{$grep->result_ref}) {
	$progress_reset->() if $progress_reset;
	# open output filter
	@opt_of && push_output_filter(
	    { &FILELABEL => $current_file },
	    \*STDOUT, @opt_of);
	output($grep);
	@opt_of && recover_stdout;
    }

    ##
    ## --end
    ##
    for my $f (@opt_end) {
	$f->call(&FILELABEL => $current_file);
    }

    s/./\000/gs if $opt_clean;

    $matched;
}

sub output {
    my $grep = shift;
    my $file = $grep->{filename};

    if ($opt_filestyle eq 'once') {
	print $_file->($file), "\n";
    }

    my $need_blockend =
	!$opt_all &&
	$blockend ne '' &&
	($opt_blockend || $opt_p || $opt_A || $opt_B || @opt_block);

    my $line = 1;
    my $lastpos = 0;
    my @results = $grep->result;
    for my $rix (keys @results) {
	my $is_top    = $rix == 0;
	my $is_bottom = $rix == $#results;

	my($blk, @result) = @{$results[$rix]};
	my($block_start, $block_end) = @$blk;
	my $block = $grep->cut($block_start, $block_end);

	## --print
	if (@opt_print) {
	    local *_ = \$block;
	    for my $func (@opt_print) {
		$_ = $func->call(&FILELABEL => $file);
	    }
	    if (not $opt_continue) {
		print $block if defined $block;
		next;
	    }
	}

	if ($opt_n) {
	    my $gap = $grep->cut($lastpos, $block_start);
	    $line += $gap =~ tr/\n/\n/;
	}
	$lastpos = $block_end;

	# when --filestyle and/or --linestyle is "separate"
	grep { $_ } (
	    do {
		print $_file->($current_file)
		    if $opt_filestyle eq 'separate';
	    },
	    do {
		print $_line->($line)
		    if $opt_n and $opt_linestyle eq 'separate';
	    }
	) and print "\n";

	if ($indexer) {
	    $indexer->reset if $indexer->block;
	    for ($indexer->reverse ? reverse @result : @result) {
		$_->[2] = $indexer->index;
	    }
	}

	my $template = unpack_template(\@result, $block_start);
	my @slice = unpack($template, $block);

	my $mark = "\001";
	for my $i (keys @result) {
	    my($start, $end, $pi, $callback) = @{$result[$i]};
	    local *b = \$slice[$i * 2 + 1];

	    ## run callback function
	    if ($callback) {
		$b = do {
		    if (ref $callback eq 'CODE') {
			$callback->($start, $end, $pi, $b);
		    }
		    elsif (callable($callback)) {
			$callback->call(
			    &FILELABEL => $file,
			    start => $start,
			    end   => $end,
			    index => $pi,
			    match => $b);
		    }
		    else { die }
		};
	    }

	    if ($opt_join) {
		if ($opt_n and $opt_linestyle eq 'line') {
		    $b =~ s/(?<!\A)\n(?!\z)/$mark/g;
		} else {
		    $b =~ s/(?<!\A)\n(?!\z)/$opt_joinby/g;
		}
	    }

	    $pi = $uniq_color->index($b) if $opt_uniqcolor;
	    $b = index_color($pi, $b);
	}

	$block = join '', @slice;
	next if $block eq "";

	my @line;
	if ($opt_n) {
	    if ($opt_linestyle eq 'line') {
		my $increment = $block =~ /[\n$mark]/ ? 1 : 0;
		$block =~ s{(?:(?<mark>$mark)|(?<=\n)|\A)(?=.)}{
		    	    push @line, $_line->($line) unless $+{mark};
			    $line += $increment;
			    $+{mark} ? $opt_joinby : '';
			   }gse;
	    } else {
		$line += $block =~ tr/\n/\n/;
	    }
	}

	$block = $_text->($block) if $colormap{TEXT} ne "";

	if (@line) {
	    $block =~ s/^/shift @line/mge;
	}

	if ($opt_filestyle eq 'line') {
	    my $s = $_file->($file);
	    $block =~ s/^/$s/mg;
	}

	print "$_top\n" if $is_top && $_top ne '';
	print $block;
	print "\n" if $opt_newline and not $block =~ /\n\z/;
	print "$_blockend\n" if $need_blockend;
	if ($is_bottom) {
	    print "$_bottom\n" if $_bottom ne '';
	} else {
	    print "$_middle\n" if $_middle ne '';
	}
    }
}

##
## split text using match information
##
sub unpack_template {
    my($matched, $offset) = @_;
    my @len;
    for (@$matched) {
	my($s, $e) = @$_;
	$s = $offset if $s < $offset;
	push @len, $s - $offset, $e - $s;
	$offset = $e;
    }
    join '', map "a$_", @len, '*';
}

__END__


=head1 INSTALL


=head2 CPANMINUS

    $ cpanm App::Greple


=head1 DESCRIPTION


=head2 MULTIPLE KEYWORDS


=head3 AND

B<greple> can take multiple search patterns with the C<-e> option, but
unlike the L<egrep(1)> command, it will search them in AND context.
For example, the next command print lines those containing all of
C<foo> and C<bar> and C<baz>.

    greple -e foo -e bar -e baz ...

Each word can appear in any order and any place in the string.  So
this command find all of following lines.

    foo bar baz
    baz bar foo
    the foo, bar and baz

If you want to use OR syntax, use regular expression.

    greple -e foo -e bar -e baz -e 'yabba|dabba|doo'

This command will print lines those contains all of C<foo>, C<bar> and
C<baz> and one or more of C<yabba>, C<dabba> or C<doo>.

=head3 NOT

Use option C<-v> to specify keyword which should not found in the data
record.  Next example will show lines those contain both C<foo> and
C<bar> but none of C<yabba>, C<dabba> or C<doo>.

    greple -e foo -e bar -v yabba -v dabba -v doo
    greple -e foo -e bar -v 'yabba|dabba|doo'

=head3 MAY

When you are focusing on multiple words, there may be words those are
not necessary but would be of interest if there were.

Use option C<--may> or C<-t> (tentative) to specify that kind of
words.  They will be a subject of search, and highlighted if exist,
but are optional.

Next command print all lines including C<foo> and C<bar>, and
highlight C<baz> as well.

    greple -e foo -e bar -t baz

=head3 MUST

Option C<--must> or C<-r> is another way to specify optional keyword.
If required keyword exists, all other positive match keyword becomes
optional.  Next command is equivalent to the above example.

    greple -r foo -r bar -e baz


=head2 LEXICAL EXPRESSION


B<greple> takes the first argument as a search pattern specified by
C<--le> option.  In the C<--le> pattern, you can set multiple keywords
in a single parameter.  Each keyword is separated by spaces, and the
first letter describes its type.

    none  And pattern            : --and  -e
    +     Required pattern       : --must -r
    -     Negative match pattern : --not  -v
    ?     Optional pattern       : --may  -t

Just like internet search engines, you can simply provide C<foo bar
baz> to search lines including all of them.

    greple 'foo bar baz'

Next command show lines which include C<foo>, but does not include
C<bar>, and highlight C<baz> if exists.

    greple 'foo -bar ?baz'


=head2 PHRASE SEARCH


B<greple> searches a given pattern across line boundaries.  This is
especially useful to handle Asian multi-byte text, more specifically
Japanese.  Japanese text can be separated by newline almost any place
in the text.  So the search pattern may spread out onto multiple
lines.

As for the ASCII word list, the space character in the pattern matches
any type of space, including newlines.  The next example will search
for the word sequence of C<foo>, C<bar> and C<baz>, even if they are
spread over lines.

    greple -e 'foo bar baz'

Option C<-e> is necessary because space is taken as a token separator
in the bare or C<--le> pattern.


=head2 FLEXIBLE BLOCKS


Default data block B<greple> search and print is a line.  Using
C<--paragraph> (or C<-p> in short) option, series of text separated by
empty line is taken as a record block.  So next command will print
whole paragraph which contains the word C<foo>, C<bar> and C<baz>.

    greple -p 'foo bar baz'

Block also can be defined by pattern.  Next command treat the data as
a series of 10-line unit.

    greple -n --border='(.*\n){1,10}'

You can also define arbitrary complex blocks by writing script.

    greple --block '&your_original_function' ...


=head2 MATCH AREA CONTROL


Using option C<--inside> and C<--outside>, you can specify the text
area to be matched.  Next commands search only in mail header and body
area respectively.  In these cases, data block is not changed, so
print lines which contains the pattern in the specified area.

    greple --inside '\A(.+\n)+' pattern

    greple --outside '\A(.+\n)+' pattern

Option C<--inside>/C<--outside> can be used repeatedly to enhance the
area to be matched.  There are similar option
C<--include>/C<--exclude>, but they are used to trim down the area.

These four options also take user defined function and any complex
region can be used.


=head2 MODULE AND CUSTOMIZATION


User can define default and original options in F<~/.greplerc>.  Next
example enables colored output always, and define new option using
macro processing.

    option default --color=always

    define :re1 complex-regex-1
    define :re2 complex-regex-2
    define :re3 complex-regex-3
    option --newopt --inside :re1 --exclude :re2 --re :re3

Specific set of function and option interface can be implemented as
module.  Modules are invoked by C<-M> option immediately after command
name.

For example, B<greple> does not have recursive search option, but it
can be implemented by C<--readlist> option which accept target file
list from standard input.  Using B<find> module, it can be written
like this:

    greple -Mfind . -type f -- pattern

Also B<dig> module implements more complex search.  It can be used as
simple as this:

    greple -Mdig pattern --dig .

but this command is finally translated into following option list.

    greple -Mfind . ( -name .git -o -name .svn -o -name RCS ) -prune -o
        -type f ! -name .* ! -name *,v ! -name *~
        ! -iname *.jpg ! -iname *.jpeg ! -iname *.gif ! -iname *.png
        ! -iname *.tar ! -iname *.tbz  ! -iname *.tgz ! -iname *.pdf
        -print -- pattern


=head2 INCLUDED MODULES


This release include some sample modules.  Read document in each
modules for detail.  You can read the document by C<--man> option or
L<perldoc> command.

    greple -Mdig --man

    perldoc App::Greple::dig

When it does not work, use C<perldoc App::Greple::dig>.

=over 7

=item B<colors>

Color variation module.
See L<App::Greple::colors>.

=item B<find>

Module to use L<find(1)> command to help recursive search.
See L<App::Greple::find>.

=item B<dig>

Module for recursive search using B<find> module.  Defines C<--dig>,
C<--git> and C<--git-r> options. See L<App::Greple::dig>.

=item B<pgp>

Module to search B<pgp> files.
See L<App::Greple::pgp>.

=item B<select>

Module to select files.
See L<App::Greple::select>.

=item B<perl>

Sample module to search from perl source files.
See L<App::Greple::perl>.

=back

Other modules are available at CPAN, or git repository
L<https://github.com/kaz-utashiro/>.


=head1 OPTIONS


=head2 PATTERNS


If no positive pattern option is given (i.e. other than C<--not> and
C<--may>), B<greple> takes the first argument as a search pattern
specified by C<--le> option.  All of these patterns can be specified
multiple times.

Command itself is written in Perl, and any kind of Perl style regular
expression can be used in patterns.  See L<perlre(1)> for detail.

Note that multiple line modifier (C<m>) is set when executed, so put
C<(?-m)> at the beginning of regex if you want to explicitly disable
it.

Order of capture group in the pattern is not guaranteed.  Please avoid
to use direct index, and use relative or named capture group instead.
For example, if you want to search repeated characters, use
S<< C<(\w)\g{-1}> >> or S<< C<(?E<lt>cE<gt>\w)\g{c}> >> rather than
S<< C<(\w)\1> >>.

Extended Bracketed Character Classes (C<(?[...])>) can be used without
warnings.  See L<perlrecharclass/"Extended Bracketed Character
Classes">.

=over 7

=item B<-e> I<pattern>, B<--and>=I<pattern>

Specify the positive match pattern.  Next command print lines contains
all of C<foo>, C<bar> and C<baz>.

    greple -e foo -e bar -e baz

=item B<-t> I<pattern>, B<--may>=I<pattern>

Specify the optional (tentative) match pattern.  Next command print
lines contains C<foo> and C<bar>, and highlight C<baz> if exists.

    greple -e foo -e bar -t baz

Since it does not affect the bare pattern argument, you can add the
highlighting word to the end of the command argument as follows.

    greple foo file
    greple foo file -t bar
    greple foo file -t bar -t baz

=item B<-r> I<pattern>, B<--must>=I<pattern>

Specify the required match pattern.  If one or more required pattern
exist, other positive match pattern becomes optional.

    greple -r foo -r bar -e baz

Because C<-t> promote all other C<-e> patterns required, next command
do the same thing.  Mixing C<-r>, C<-e> and C<-t> is not recommended,
though.

    greple -r foo -e bar -t baz

=item B<-v> I<pattern>, B<--not>=I<pattern>

Specify the negative match pattern.  Because it does not affect to the
bare pattern argument, you can narrow down the search result like
this.

    greple foo file
    greple foo file -v bar
    greple foo file -v bar -v baz

=back

In the above pattern options, space characters are treated specially.
They are replaced by the pattern which matches any number of white
spaces including newline.  So the pattern can expand to multiple
lines.  Next commands search the series of word C<foo> C<bar> C<baz>
even if they are separated by newlines.

    greple -e 'foo bar baz'

This is done by converting pattern C<foo bar baz> to
C<foo\s+bar\+baz>, so that word separator can match one or more white
spaces.

As for Asian wide characters, pattern is cooked as zero or more white
spaces can be allowed between any characters.  So Japanese string
pattern C<日本語> will be converted to C<< 日\s*本\s*語 >>.

If you don't want these conversion, use C<-E> (or C<--re>) option.

=over 4

=item B<-x> I<pattern>, B<--le>=I<pattern>

Treat the pattern string as a collection of tokens separated by
spaces.  Each token is interpreted by the first character.  Token
start with C<-> means B<negative> pattern, C<?> means B<optional>, and
C<+> does B<required>.

The next example prints lines which containing C<foo> and C<yabba>,
and none of C<bar> and C<dabba>, with highlighting C<baz> and C<doo>
if they exist.

    greple --le='foo -bar ?baz yabba -dabba ?doo'

This is the summary of start character for C<--le> option:

    +  Required pattern
    -  Negative match pattern
    ?  Optional pattern
    &  Function call (see next section)

=item B<-x> [B<+?->]B<&>I<function>, B<--le>=[B<+?->]B<&>I<function>

If the pattern start with ampersand (C<&>), it is treated as a
function, and the function is called instead of searching pattern.
Function call interface is same as the one for block/region options.

If you have a definition of I<odd_line> function in you F<.greplerc>,
which is described in this manual later, you can print odd number
lines like this:

    greple -n '&odd_line' file

Required (C<+>), optional (C<?>) and negative (C<->) mark can be used
for function pattern.

B<CALLBACK FUNCTION>: Region list returned by function can have two
extra elements besides start/end position.  Third element is index.
Fourth element is a callback function pointer which will be called to
produce string to be shown in command output.  Callback function is
called with four arguments (start position, end position, index,
matched string) and expected to return replacement string.

=item B<-E> I<pattern>, B<--re>=I<pattern>

Specify regular expression.  No special treatment for space and wide
characters.

=item B<--fe>=I<pattern>

Specify the fixed string pattern, like L<fgrep(1)>.

=item B<-i>, B<--ignore-case>

Ignore case.

=item B<-G>, B<--capture-group>

Normally, B<greple> searches for strings that match the entire
pattern.  Even if it contains a capturing groups, they do not affect
the search target.  When this option is given, strings corresponding
to individual capture groups are searched, not the entire pattern.  If
the pattern does not contain any capturing groups, it matches the
entire pattern.

If C<G> is specified in the B<--colorindex> option, a corresponding
capture group number is assigned as an index (0 for entire match).
This will cause the strings corresponding to each capture group to be
displayed in a different color.

=item B<-S>, B<--stretch>

Forget the information about the individual match strings and act as if 
the block containing them matched.

The following command will match an entire line containing all of
C<foo>, C<bar>, and C<baz> in any order.

    greple --stretch 'foo bar baz'

For all matches, the index is set to 0.  If multiple callback
functions are specified, the first match in the block will be
effective.

=item B<--need>=I<n>

=item B<--allow>=I<n>

Option to compromise matching condition.  Option C<--need> specifies
the required match count, and C<--allow> the number of negative
condition to be overlooked.

    greple --need=2 --allow=1 'foo bar baz -yabba -dabba -doo'

Above command prints the line which contains two or more from C<foo>,
C<bar> and C<baz>, and does not include more than one of C<yabba>,
C<dabba> or C<doo>.

Using option C<--need=1>, B<greple> produces same result as B<grep>
command.

    grep   -e foo -e bar -e baz
    greple -e foo -e bar -e baz --need=1

When the count I<n> is negative value, it is subtracted from default
value.

If the option C<--need=0> is specified and no pattern was found,
entire data is printed.  This is true even for required pattern.

=item B<--matchcount>=I<count> B<--mc>=...

=item B<--matchcount>=I<min>,I<max> B<--mc>=...

When option C<--matchcount> is specified, only blocks which have given
match count will be shown.  Minimum and maximum number can be given,
connecting by comma, and they can be omitted.  Next commands print
lines including semicolons; 3 or more, exactly 3, and 3 or less,
respectively.

    greple --matchcount=3, ';' file

    greple --matchcount=3  ';' file

    greple --matchcount=,3 ';' file

In fact, I<min> and I<max> can repeat to represent multiple range.
Missing, negative or zero I<max> means infinite.  Next command find
match count 0 to 10, 20 to 30, and 40-or-greater.

    greple --matchcount=,10,20,30,40

=item B<-f> I<file>[@I<index>], B<--file>=I<file>[@I<index>]

Specifies the file containing the search pattern. If there are
multiple lines in the file, each pattern is combined by an OR context.
So the file:

    A
    B
    C

makes the pattern as C<A|B|C>, or more precisely
C<(?^m:A)(?^m:B)(?^m:C)>.

Each of these patterns are evaluated independently only with C<m>
modifier.  So if you enable some flags in a pattern, they are only
valid within itself.

Blank line and the line starting with sharp (#) character is ignored.
Two slashes (//) and following string are taken as a comment and
removed with preceding spaces.  If the character at the end of the
line is a backslash, the backslash is removed and concatenated with
the next line.

Complex pattern can be written on multiple lines as follows.

    (?xxn) \
    ( (?<b>\[) | \@ )   # start with "[" or @             \
    (?<n> [ \d : , ]+)  # sequence of digit, ":", or ","  \
    (?(<b>) \] | )      # closing "]" if start with "["   \
    $                   # EOL

If multiple files are specified, a separate group pattern is generated
for each file.

If the file name is followed by C<@index> string, it is treated as
specified by C<--select> option.  Next commands are all equivalent.

    greple -f pattern_file@2,7:9

    greple -f pattern_file --select 2,7:9

Next C<[index]> style is obsolete and will be deprecated in the
future.

    greple -f pattern_file[2,7:9]

See L<App::Greple::subst> module.

=item B<--select>=I<index>

When you want to choose specific line in the pattern file provided by
C<-f> option, use C<--select> option.  I<index> is number list
separated by comma (,) character and each number is interpreted by
L<Getopt::EX::Numbers> module.  Take a look at the module document for
detail.

Next command use 2nd and 7,8,9th lines in the pattern file.

    greple -f pattern_file --select 2,7:9

=back


=head2 STYLES


=over 7

=item B<-l>

List filename only.

=item B<-c>, B<--count>

Print count of matched block.

=item B<-n>, B<--line-number>

Show line number.

=item B<-h>, B<--no-filename>

Do not display filename.

=item B<-H>

Display filename always.

=item B<-o>, B<--only-matching>

Print matched string only.  Newline character is printed after matched
string if it does not end with newline.  Use C<--no-newline> option if
you don't need extra newline.

=item B<--all>

Print entire file.  This option does not affect to seach behavior or
block treatment.  Just print all contents.  Can be negated by the
B<--no-all> option

=item B<-m> I<n>[,I<m>], B<--max-count>=I<n>[,I<m>]

Set the maximum count of blocks to be shown to I<n>.

Actually I<n> and I<m> are simply passed to perl L<splice> function as
I<offset> and I<length>.  Works like this:

    greple -m  10      # get first 10 blocks
    greple -m   0,-10  # get last 10 blocks
    greple -m   0,10   # remove first 10 blocks
    greple -m -10      # remove last 10 blocks
    greple -m  10,10   # remove 10 blocks from 10th (10-19)

This option does not affect to search performance and command exit
status.

Note that B<grep> command also has same option, but it's behavior is
different when invoked to multiple files.  B<greple> produces given
number of output for each file, while B<grep> takes it as a total
number of output.

=item B<-m> I<*>, B<--max-count>=I<*>

In fact, I<n> and I<m> can repeat as many as possible.  Next example
removes first 10 blocks (by C<0,10>), then get first 10 blocks from
the result (by C<10>).  Consequently, get 10 blocks from 10th (10-19).

    greple -m 0,10,10

Next command get first 20 (by C<20,>) and get last 10 (by C<,-10>),
producing same result.  Empty string behaves like absence for
I<length> and zero for I<offset>.

    greple -m 20,,,-10

=item B<-A>[I<n>], B<--after-context>[=I<n>]

=item B<-B>[I<n>], B<--before-context>[=I<n>]

=item B<-C>[I<n>], B<--context>[=I<n>]

Print I<n>-blocks before/after matched string.  The value I<n> can be
omitted and the default is 2.  When used with C<--paragraph> or
C<--block> option, I<n> means number of paragraph or block.

Actually, these options expand the area of logical operation.  It
means

    greple -C1 'foo bar baz'

matches following text.

    foo
    bar
    baz

Moreover

    greple -C1 'foo baz'

also matches this text, because matching blocks around C<foo> and
C<bar> overlaps each other and makes single block.

=item B<--join>

=item B<--joinby>=I<string>

Convert newline character found in matched string to empty or specified
I<string>.  Using C<--join> with C<-o> (only-matching) option, you can
collect searching sentence list in one per line form.  This is
sometimes useful for Japanese text processing.  For example, next
command prints the list of KATAKANA words, including those spread
across multiple lines.

    greple -ho --join '\p{InKatakana}+(\n\p{InKatakana}+)*'

Space separated word sequence can be processed with C<--joinby>
option.  Next example prints all C<for *something*> pattern in pod
documents within Perl script.

    greple -Mperl --pod -ioe '\bfor \w+' --joinby ' '

=item B<--[no]newline>

Since B<greple> can handle arbitrary blocks other than normal text
lines, they sometimes do not end with newline character.  Option C<-o>
makes similar situation.  In that case, extra newline is appended at
the end of block to be shown.  Option C<--no-newline> disables this
behavior.

=item B<--filestyle>=[C<line>,C<once>,C<separate>], B<--fs>

Default style is I<line>, and B<greple> prints filename at the
beginning of each line.  Style I<once> prints the filename only once
at the first time.  Style I<separate> prints filename in the separate
line before each line or block.

=item B<--linestyle>=[C<line>,C<separate>], B<--ls>

Default style is I<line>, and B<greple> prints line numbers at the
beginning of each line.  Style I<separate> prints line number in the
separate line before each line or block.

=item B<--separate>

Shortcut for C<--filestyle=separate> C<--linestyle=separate>.
This is convenient to use block mode search and visiting each location
from supporting tool, such as Emacs.

=item B<--format> B<LABEL>=I<format>

Define the format string of line number (LINE) and file name (FILE) to
be displayed.  Default is:

    --format LINE='%d:'

    --format FILE='%s:'

Format string is passed to C<sprintf> function.  Tab character can be
expressed as C<\t>.

Next example will show line numbers in five digits with tab space:

    --format LINE='%05d\t'

=item B<--frame-top>=I<string>

=item B<--frame-middle>=I<string>

=item B<--frame-bottom>=I<string>

Print surrounding frames before and after each block.  C<top> frame is
printed at the beginning, C<bottom> frame at the end, C<middle> frame
between blocks.

=back


=head2 FILES


=over 7

=item B<--glob>=I<pattern>

Get files matches to specified pattern and use them as a target files.
Using C<--chdir> and C<--glob> makes easy to use B<greple> for fixed
common job.

=item B<--chdir>=I<directory>

Change directory before processing files.  When multiple directories
are specified in C<--chdir> option, by using wildcard form or
repeating option, C<--glob> file expansion will be done for every
directories.

    greple --chdir '/usr/man/man?' --glob '*.[0-9]' ...

=item B<--readlist>

Get filenames from standard input.  Read standard input and use each
line as a filename for searching.  You can feed the output from other
command like L<find(1)> for B<greple> with this option.  Next example
searches string from files modified within 7 days:

    find . -mtime -7 -print | greple --readlist pattern

Using B<find> module, this can be done like:

    greple -Mfind . -mtime -7 -- pattern

=back


=head2 COLORS


=over 7

=item B<--color>=[C<auto>,C<always>,C<never>], B<--nocolor>

Use terminal color capability to emphasize the matched text.  Default
is C<auto>: effective when STDOUT is a terminal and option C<-o> is
not given, not otherwise.  Option value C<always> and C<never> will
work as expected.

Option B<--nocolor> is alias for B<--color>=I<never>.

When color output is disabled, ANSI terminal sequence is not produced,
but functional colormap, such as C<--cm sub{...}>, still works.

=item B<--colormap>=I<spec>, B<--cm>=...

Specify color map.  Because this option is mostly implemented by
L<Getopt::EX::Colormap> module, consult its document for detail and
up-to-date specification.

Color specification is combination of single uppercase character
representing basic colors, and (usually brighter) alternative colors in
lowercase:

    R  r   Red
    G  g   Green
    B  b   Blue
    C  c   Cyan
    M  m   Magenta
    Y  y   Yellow
    K  k   Black
    W  w   White

or RGB value and 24 grey levels if using ANSI 256 color terminal:

    (255,255,255)      : 24bit decimal RGB colors
    #000000 .. #FFFFFF : 24bit hex RGB colors
    #000    .. #FFF    : 12bit hex RGB 4096 colors
    000 .. 555         : 6x6x6 RGB 216 colors
    L00 .. L25         : Black (L00), 24 grey levels, White (L25)

=over 4

Beginning # can be omitted in 24bit RGB notation.

When values are all same in 24bit or 12bit RGB, it is converted to 24
grey level, otherwise 6x6x6 216 color.

=back

or color names enclosed by angle bracket:

    <red> <blue> <green> <cyan> <magenta> <yellow>
    <aliceblue> <honeydue> <hotpink> <mooccasin>
    <medium_aqua_marine>

with other special effects:

    N    None
    Z  0 Zero (reset)
    D  1 Double strike (boldface)
    P  2 Pale (dark)
    I  3 Italic
    U  4 Underline
    F  5 Flash (blink: slow)
    Q  6 Quick (blink: rapid)
    S  7 Stand out (reverse video)
    H  8 Hide (concealed)
    X  9 Cross out
    E    Erase Line

    ;    No effect
    /    Toggle foreground/background
    ^    Reset to foreground

If the spec includes C</>, left side is considered as foreground color
and right side as background.  If multiple colors are given in same
spec, all indicators are produced in the order of their presence.  As
a result, the last one takes effect.

Effect characters are case insensitive, and can be found anywhere and
in any order in color spec string.  Character C<;> does nothing and
can be used just for readability, like C<SD;K/544>.

Example:

    RGB  6x6x6    12bit      24bit           color name
    ===  =======  =========  =============  ==================
    B    005      #00F       (0,0,255)      <blue>
     /M     /505      /#F0F   /(255,0,255)  /<magenta>
    K/W  000/555  #000/#FFF  000000/FFFFFF  <black>/<white>
    R/G  500/050  #F00/#0F0  FF0000/00FF00  <red>/<green>
    W/w  L03/L20  #333/#ccc  303030/c6c6c6  <dimgrey>/<lightgrey>

Multiple colors can be specified separating by white space or comma,
or by repeating options.  Those colors will be applied for each
pattern keywords.  Next command will show word C<foo> in red, C<bar>
in green and C<baz> in blue.

    greple --colormap='R G B' 'foo bar baz'

    greple --cm R -e foo --cm G -e bar --cm B -e baz

Coloring capability is implemented in L<Getopt::EX::Colormap> module.

=item B<--colormap>=I<field>=I<spec>,...

Another form of colormap option to specify the color for fields:

    FILE      File name
    LINE      Line number
    TEXT      Unmatched normal text
    BLOCKEND  Block end mark
    PROGRESS  Progress status with -dnf option

In current release, C<BLOCKEND> mark is colored with C<E> effect
recently implemented in L<Getopt::EX> module, which allows to fill up
the line with background color.  This effect uses irregular escape
sequence, and you may need to define C<LESSANSIENDCHARS> environment
as "mK" to see the result with L<less> command.

=item B<--colormap>=C<&func>

=item B<--colormap>=C<sub{...}>

You can also set the name of perl subroutine name or definition to be
called handling matched words.  Target word is passed as variable
C<$_>, and the return value of the subroutine will be displayed.

Next command convert all words in C comment to upper case.

    greple --all '/\*(?s:.*?)\*/' --cm 'sub{uc}'

You can quote matched string instead of coloring (this emulates
deprecated option C<--quote>):

    greple --cm 'sub{"<".$_.">"}' ...

It is possible to use this definition with field names.  Next example
print line numbers in seven digits.

    greple -n --cm 'LINE=sub{s/(\d+)/sprintf("%07d",$1)/e;$_}'

Experimentally, function can be combined with other normal color
specifications.  Also the form C<&func;> can be repeated.

    greple --cm 'BF/544;sub{uc}'

    greple --cm 'R;&func1;&func2;&func3'

When color for 'TEXT' field is specified, whole text including matched
part is passed to the function, exceptionally.  It is not recommended
to use user defined function for 'TEXT' field.

=item B<--colorsub>=C<...>, B<--cs>=C<...>

C<--colorsub> or C<--cs> is a shortcut for subroutine colormap.  It
simply enclose the argument by C<sub{ ... }> expression.  So

    greple -cm 'sub{uc}'

can be written as simple as this.

    greple -cs uc

You can not use this option for labeled color.

=item B<--[no]colorful>

Shortcut for C<--colormap>='C<RD GD BD CD MD YD>' in ANSI 16 colors
mode, and C<--colormap>='C<D/544 D/454 D/445 D/455 D/454 D/554>' and
other combination of 3, 4, 5 for 256 colors mode.  Enabled by default.

When single pattern is specified, first color in colormap is used for
the pattern.  If multiple patterns and multiple colors are specified,
each pattern is colored with corresponding color cyclically.

Option C<--regioncolor>, C<--uniqcolor> and C<--colorindex> change
this behavior.

=item B<--colorindex>=I<spec>, B<--ci>=I<spec>

Specify color index method by combination of spec characters.  B<A>
(ascend) and B<D> (descend) can be mixed with B<B> (block) and/or B<S>
(shuffle) like C<--ci=ABS>.  B<R> (random) can be too but it does not
make sense.  When B<S> is used alone, colormap is shuffled with normal
behavior.

=over 4

=item A (Ascending)

Apply different color sequentially according to the order of
appearance.

=item D (Descending)

Apply different color sequentially according to the reverse order of
appearance.

=item B (Block)

Reset sequential index on every block.

=item S (Shuffle)

Shuffle indexed color.

=item R (Random)

Use random color index every time.

=item G (Group)

Valid only when used with the B<--capture-group> (or B<-G>)
option. Assigns an index number corresponding to each captuer group.

=item N (Normal)

Reset to normal behavior.  Because the last option takes effect,
C<--ci=N> can be used to reset the behavior set by previous options.

=back

=item B<--random>

Shortcut for C<--colorindex=R>.

=item B<--ansicolor>=[C<16>,C<256>,C<24bit>]

If set as C<16>, use ANSI 16 colors as a default color set, otherwise
ANSI 256 colors.  When set as C<24bit>, 6 hex digits notation produces
24bit color sequence.  Default is C<256>.

=item B<--[no]256>

Shortcut for C<--ansicolor>=C<256> or C<16>.

=item B<--[no]regioncolor>, B<--[no]rc>

Use different colors for each C<--inside> and C<--outside> region.

Disabled by default, but automatically enabled when only single search
pattern is specified.  Use C<--no-regioncolor> to cancel automatic
action.

=item B<--[no]uniqcolor>, B<--[no]uc>

Use different colors for different string matched.
Disabled by default.

Next example prints all words start by C<color> and display them all
in different colors.

    greple --uniqcolor 'colou?r\w*'

When used with option C<-i>, color is selected still in case-sensitive
fashion.  If you want case-insensitive color selection, use next
C<--uniqsub> option.

=item B<--uniqsub>=I<function>, B<--us>=I<function>

Above option C<--uniqcolor> set same color for same literal string.
Option C<--uniqsub> specify the preprocessor code applied before
comparison.  I<function> get matched string by C<$_> and returns the
result.  For example, next command will choose unique colors for each
word by their length.

    greple --uniqcolor --uniqsub 'sub{length}' '\w+' file

If you want case-insensitive color selection, do like this.

    greple -i pattern --uc --uniqsub 'sub{lc}'

Next command read the output from C<git blame> command and set unique
color for each entire line by their commit ids.

    git blame ... | greple .+ --uc --us='sub{s/\s.*//r}' --face=E-D

=item B<--face>=[+-=]I<effect>

Append, remove or set specified I<effect> for all indexed color specs.
Use C<+> (optional) to append, C<-> to remove, and C<=> to set.
Effect is a single character expressing C<S> (Stand-out), C<U>
(Underline), C<D> (Double-struck), C<F> (Flash) and such.

Next example removes D (double-struck) effect.

    greple --face -D

Multiple effects can be added/removed at once.

    greple --face SF-D

Next example clears all existing color specs.

    greple --face =

=back


=head2 BLOCKS


=over 7

=item B<-p>, B<--paragraph>

Print a paragraph which contains the pattern.  Each paragraph is
delimited by two or more successive newlines by default.  Be aware
that an empty line is not a paragraph delimiter if which contains
space characters.  Example:

    greple -np 'setuid script' /usr/man/catl/perl.l

    greple -pe '^struct sockaddr' /usr/include/sys/socket.h

It changes the unit of context specified by C<-A>, C<-B>, C<-C>
options.  Space gap between paragraphs are also treated as a block
unit.  Thus, option C<-pC2> will print target paragraph along with
previous and next paragraph.  Option C<-pC1> causes consecutive
paragraphs to be output as the same block in an easy-to-read format.

You can create original paragraph pattern by C<--border> option.

=item B<--border>=I<pattern>

Specify record block border pattern.  Pattern match is done in the
context of multiple line mode.

Default block is a single line and use C</^/m> as a pattern.
Paragraph mode uses C</(?:\A|\R)\K\R+/>, which means continuous
newlines at the beginning of text or following another newline (C<\R>
means more general linebreaks including C<\r\n>; consult
L<perlrebackslash> for detail).

Next command treat the data as a series of 10-line unit.

    greple -n --border='(.*\n){1,10}'

Contrary to the next C<--block> option, C<--border> never produce
disjoint records.

If you want to treat entire file as a single block, setting border to
start or end of whole data is efficient way.  Next commands works
same.

    greple --border '\A'    # beginning of file
    greple --border '\z'    # end of file

=item B<--block>=I<pattern>

=item B<--block>=I<&sub>

Specify the record block to display.  Default block is a single line.

Empty blocks are ignored.  When blocks are not continuous, the match
occurred outside blocks are ignored.

If multiple block options are given, overlapping blocks are merged
into a single block.

Please be aware that this option is sometimes quite time consuming,
because it finds all blocks before processing.

=item B<--blockend>=I<string>

Change the end mark displayed after C<-pABC> or C<--block> options.
Default value is "--".

=item B<--join-blocks>

Join consecutive blocks together.  Logical operation is done for each
individual blocks, but if the results are back-to-back connected, make
them single block for final output.

=back


=head2 REGIONS


=over 7

=item B<--inside>=I<pattern>

=item B<--outside>=I<pattern>

Option C<--inside> and C<--outside> limit the text area to be matched.
For simple example, if you want to find string C<and> not in the word
C<command>, it can be done like this.

    greple --outside=command and

The block can be larger and expand to multiple lines.  Next command
searches from C source, excluding comment part.

    greple --outside '(?s)/\*.*?\*/'

Next command searches only from POD part of the perl script.

    greple --inside='(?s)^=.*?(^=cut|\Z)'

When multiple B<inside> and B<outside> regions are specified, those
regions are mixed up in union way.

In multiple color environment, and if single keyword is specified,
matches in each C<--inside>/C<--outside> region is printed in different
color.  Forcing this operation with multiple keywords, use
C<--regioncolor> option.

=item B<--inside>=I<&function>

=item B<--outside>=I<&function>

If the pattern name begins by ampersand (&) character, it is treated
as a name of subroutine which returns a list of blocks.  Using this
option, user can use arbitrary function to determine from what part of
the text they want to search.  User defined function can be defined in
F<.greplerc> file or by module option.

=item B<--include>=I<pattern>

=item B<--exclude>=I<pattern>

=item B<--include>=I<&function>

=item B<--exclude>=I<&function>

C<--include>/C<--exclude> option behave exactly same as
C<--inside>/C<--outside> when used alone.

When used in combination, C<--include>/C<--exclude> are mixed in AND
manner, while C<--inside>/C<--outside> are in OR.

Thus, in the next example, first line prints all matches, and second
does none.

    greple --inside PATTERN --outside PATTERN

    greple --include PATTERN --exclude PATTERN

You can make up desired matches using C<--inside>/C<--outside> option,
then remove unnecessary part by C<--include>/C<--exclude>

=item B<--strict>

Limit the match area strictly.

By default, C<--block>, C<--inside>/C<outside>,
C<--include>/C<--exclude> option allows partial match within the
specified area.  For instance,

    greple --inside and command

matches pattern C<command> because the part of matched string is
included in specified inside-area.  Partial match fails when option
C<--strict> provided, and longer string never matches within shorter
area.

Interestingly enough, above example

    greple --include PATTERN --exclude PATTERN

produces output, as a matter of fact.  Think of the situation
searching, say, C<' PATTERN '> with this condition.  Matched area
includes surrounding spaces, and satisfies both conditions partially.
This match does not occur when option C<--strict> is given, either.

=back


=head2 CHARACTER CODE


=over 7

=item B<--icode>=I<code>

Target file is assumed to be encoded in utf8 by default.  Use this
option to set specific encoding.  When handling Japanese text, you may
choose from 7bit-jis (jis), euc-jp or shiftjis (sjis).  Multiple code
can be supplied using multiple option or combined code names with
space or comma, then file encoding is guessed from those code sets.
Use encoding name C<guess> for automatic recognition from default code
list which is euc-jp and 7bit-jis.  Following commands are all
equivalent.

    greple --icode=guess ...
    greple --icode=euc-jp,7bit-jis ...
    greple --icode=euc-jp --icode=7bit-jis ...

Default code set are always included suspect code list.  If you have
just one code adding to suspect list, put + mark before the code name.
Next example does automatic code detection from euc-kr, ascii, utf8
and UTF-16/32.

    greple --icode=+euc-kr ...

If the string "B<binary>" is given as encoding name, no character
encoding is expected and all files are processed as binary data.

=item B<--ocode>=I<code>

Specify output code.  Default is utf8.

=back


=head2 FILTER


=over 7

=item B<--if>=I<filter>, B<--if>=I<EXP>:I<filter>

You can specify filter command which is applied to each file before
search.  If only one filter command is specified, it is applied to all
files.  If filter information include colon, first field will be perl
expression to check the filename saved in variable $_.  If it
successes, next filter command is pushed.

    greple --if=rev perg
    greple --if='/\.tar$/:tar tvf -'

If the command doesn't accept standard input as processing data, you
may be able to use special device:

    greple --if='nm /dev/stdin' crypt /usr/lib/lib*

Filters for compressed and gzipped file is set by default unless
C<--noif> option is given.  Default action is like this:

    greple --if='s/\.Z$//:zcat' --if='s/\.g?z$//:gunzip -c'

File with C<.gpg> suffix is filtered by B<gpg> command.  In that case,
pass-phrase is asked for each file.  If you want to input pass-phrase
only once to find from multiple files, use C<-Mpgp> module.

If the filter start with C<&>, perl subroutine is called instead of
external command.  You can define the subroutine in F<.greplerc> or
modules.  B<Greple> simply call the subroutine, so it should be
responsible for process control.

=item B<--noif>

Disable default input filter.  Which means compressed files will not
be decompressed automatically.

=item B<--of>=I<filter>

=item B<--of>=I<&func>

Specify output filter which process the output of B<greple> command.
Filter command can be specified in multiple times, and they are
invoked for each file to be processed.  So next command reset the line
number for each file.

    greple --of 'cat -n' string file1 file2 ...

If the filter start with C<&>, perl subroutine is called instead of
external command.  You can define the subroutine in F<.greplerc> or
modules.

Output filter command is executed only when matched string exists to
avoid invoking many unnecessary processes.  No effect for option
C<-l> and C<-c>.

=item B<--pf>=I<filter>

=item B<--pf>=I<&func>

Similar to C<--of> filter but invoked just once and takes care of
entire output from B<greple> command.

=back


=head2 RUNTIME FUNCTIONS


=over 7

=item B<--begin>=I<function>(I<...>)

=item B<--begin>=I<function>=I<...>

Option C<--begin> specify the function executed at the beginning of
each file processing.  This I<function> have to be called from B<main>
package.  So if you define the function in the module package, use the
full package name or export properly.

If the function dies with a message starting with a word "SKIP"
(C</^SKIP/i>), that file is simply skipped.  So you can control if the
file is to be processed using the file name or content.  To see the
message, use C<--warn begin=1> option.

For example, using next function, only perl related files will be
processed.

    sub is_perl {
        my %arg = @_;
        my $name = delete $arg{&FILELABEL} or die;
        $name =~ /\.(?:pm|pl|PL|pod)$/ or /\A#!.*\bperl/
            or die "skip $name\n";
    }

    1;

    __DATA__

    option default --filestyle=once --format FILE='\n%s:\n'

    autoload -Mdig --dig
    option --perl $<move> --begin &__PACKAGE__::is_perl --dig .

=item B<--end>=I<function>(I<...>)

=item B<--end>=I<function>=I<...>

Option C<--end> is almost same as C<--begin>, except that the function
is called after the file processing.

=item B<--prologue>=I<function>(I<...>)

=item B<--prologue>=I<function>=I<...>

=item B<--epilogue>=I<function>(I<...>)

=item B<--epilogue>=I<function>=I<...>

Option C<--prologue> and C<--epilogue> specify functions called before
and after processing.  During the execution, file is not opened and
therefore, file name is not given to those functions.

=item B<--postgrep>=I<function>(I<...>)

=item B<--postgrep>=I<function>=I<...>

Specify the function called after each search operation.  Funciton is
called with C<App::Greple::Grep> object which cotains all information
about the search.  This interface highly depends on the internal
structure, so use with the utmost caution.

=item B<--callback>=I<function>(I<...>)

Callback function is called before printing every matched pattern with
four labeled parameters: B<start>, B<end>, B<index> and B<match>,
which corresponds to start and end position in the text, pattern
index, and the matched string.  Matched string in the text is replaced
by returned string from the function.

Multiple functions can be specified, and if there are multiple search
patterns, they are applied in order and cyclically.

=item B<-M>I<module>::I<function(...)>

=item B<-M>I<module>::I<function=...>

Function can be given with module option, following module name.  In
this form, the function will be called with module package name.  So
you don't have to export it.  Because it is called only once at the
beginning of command execution, before starting file processing,
C<FILELABEL> parameter is not given exceptionally.

=item B<--print>=I<function>

=item B<--print>=I<sub{...}>

Specify user defined function executed before data print.  Text to be
printed is replaced by the result of the function.  Arbitrary function
can be defined in F<.greplerc> file or module.  Matched data is placed
in variable C<$_>.  Filename is passed by C<&FILELABEL> key, as
described later.

It is possible to use multiple C<--print> options.  In that case,
second function will get the result of the first function.  The
command will print the final result of the last function.

This option and next B<--continue> are no more recommended to use
because B<--colormap> and B<--callback> functions are more simple and
powerful.

=item B<--continue>

When C<--print> option is given, B<greple> will immediately print the
result returned from print function and finish the cycle.  Option
C<--continue> forces to continue normal printing process after print
function called.  So please be sure that all data being consistent.

=back

For these run-time functions, optional argument list can be set in the
form of C<key> or C<key=value>, connected by comma.  These arguments
will be passed to the function in key => value list.  Sole key will
have the value one.  Also processing file name is passed with the key
of C<FILELABEL> constant.  As a result, the option in the next form:

    --begin function(key1,key2=val2)
    --begin function=key1,key2=val2

will be transformed into following function call:

    function(&FILELABEL => "filename", key1 => 1, key2 => "val2")

As described earlier, C<FILELABEL> parameter is not given to the
function specified with module option. So

    -Mmodule::function(key1,key2=val2)
    -Mmodule::function=key1,key2=val2

simply becomes:

    function(key1 => 1, key2 => "val2")

The function can be defined in F<.greplerc> or modules.  Assign the
arguments into hash, then you can access argument list as member of
the hash.  It's safe to delete FILELABEL key if you expect random
parameter is given.  Content of the target file can be accessed by
C<$_>.  Ampersand (C<&>) is required to avoid the hash key is
interpreted as a bare word.

    sub function {
        my %arg = @_;
        my $filename = delete $arg{&FILELABEL};
        $arg{key1};             # 1
        $arg{key2};             # "val2"
        $_;                     # contents
    }


=head2 OTHERS


=over 7

=item B<--usage>[=I<expand>]

B<Greple> print usage and exit with option C<--usage>, or no valid
parameter is not specified.  In this case, module option is displayed
with help information if available.  If you want to see how they are
expanded, supply something not empty to C<--usage> option, like:

    greple -Mmodule --usage=expand

=item B<--exit>=I<number>

When B<greple> executed normally, it exit with status 0 or 1 depending
on something matched or not.  Sometimes we want to get status 0 even
if nothing matched.  This option set the status code for normal
execution.  It still exits with non-zero status when error occurred.

=item B<--man>, B<--doc>

Show manual page.
Display module's manual page when used with C<-M> option.

=item B<--show>, B<--less>

Show module file contents.  Use with C<-M> option.

=item B<--path>

Show module file path.  Use with C<-M> option.

=item B<--norc>

Do not read startup file: F<~/.greplerc>.  This option have to be
placed before any other options including C<-M> module options.
Setting C<GREPLE_NORC> environment have same effect.

=begin comment

=item B<-d> I<flags>

Display informations.  Various kind of debug, diagnostic, monitor
information can be display by giving appropriate flag to -d option.

    c: color information
    d: directory information
    e: eval string
    f: processing file name
    m: misc debug information
    n: number of processing files
    o: option related information
    p: run `ps' command before termination (on Unix)
    s: statistic information
    u: unused options
    v: internal match information

=end comment

=item B<--persist>

Use C<--error=retry>.  Will be deprecated in the future.

=item B<--error>=I<action>

As B<greple> tries to read data as a character string, sometimes fails
to convert them into internal representation, and the file is skipped
without processing by default.  This works fine to skip binary
data. (B<skip>)

Also sometimes encounters code mapping error due to character
encoding.  In this case, reading the file as a binary data helps to
produce meaningful output. (B<retry>)

This option specifies the action when data read error occurred.

=over 4

=item B<skip>

Skip the file.  Default.

=item B<retry>

Retry reading the file as a binary data.

=item B<fatal>

Abort the operation.

=item B<ignore>

Ignore error and continue to read anyway.

=back

You may occasionally want to find text in binary data.  Next command
will work like L<strings(1)> command.

    greple -o --re '(?a)\w{4,}' --error=retry --uc /bin/*

If you want read all files as binary data, use C<--icode=binary>
instead.

=item B<-w>, B<--warn> I<type>=[C<0>,C<1>]

Control runtime message mainly about file operation related to
C<--error> option.  Repeatable.  Value is optional and 1 is assumed
when omitted.  So C<-wall> option is same as C<-wall=1> and enables
all messages, and C<-wall=0> disables all.

Types are:

=over 4

=item B<read>

(Default 0) Errors occurred during file read.  Mainly unicode related
errors when reading binary or ambiguous text file.

=item B<skip>

(Default 1) File skip message.

=item B<retry>

(Default 0) File retry message.

=item B<begin>

(Default 0) When C<--begin> function died with C</^SKIP/i> message,
the file is skipped without any notice.  Enables this to see the dying
message.

=item B<all>

Set same value for all types.

=back

=item B<--alert> [ C<size>=#, C<time>=# ]

Set alert parameter for large file.  B<Greple> scans whole file
content to know line borders, and it takes several seconds or more if
it contains large number of lines.

By default, if the target file contains more than B<512 * 1024
characters> (I<size>), B<2 seconds> timer will start (I<time>).  Alert
message is shown when the timer expired.

To disable this alert, set the size as zero:

    --alert size=0

=item B<-Mdebug>, B<-d>I<x>

Debug option is described in L<App::Greple::debug> module.

=back


=head1 ENVIRONMENT and STARTUP FILE


=over 7

=item B<GREPLEOPTS>

Environment variable GREPLEOPTS is used as a default options.  They
are inserted before command line options.

=item B<GREPLE_NORC>

If set non-empty string, startup file F<~/.greplerc> is not processed.

=item B<DEBUG_GETOPT>

Enable L<Getopt::Long> debug option.

=item B<DEBUG_GETOPTEX>

Enable L<Getopt::EX> debug option.

=item B<NO_COLOR>

If true, all coloring capability with ANSI terminal sequence is
disabled.  See L<https://no-color.org/>.

=back

Before starting execution, B<greple> reads the file named F<.greplerc>
on user's home directory.  Following directives can be used.

=over 7

=item B<option> I<name> string

Argument I<name> of B<option> directive is user defined option name.
The rest are processed by C<shellwords> routine defined in
Text::ParseWords module.  Be sure that this module sometimes requires
escape backslashes.

Any kind of string can be used for option name but it is not combined
with other options.

    option --fromcode --outside='(?s)\/\*.*?\*\/'
    option --fromcomment --inside='(?s)\/\*.*?\*\/'

If the option named B<default> is defined, it will be used as a
default option.

For the purpose to include following arguments within replaced
strings, two special notations can be used in option definition.
String C<$E<lt>nE<gt>> is replaced by the I<n>th argument after the
substituted option, where I<n> is number start from one.  String
C<$E<lt>shiftE<gt>> is replaced by following command line argument and
the argument is removed from option list.

For example, when

    option --line --le &line=$<shift>

is defined, command

    greple --line 10,20-30,40

will be evaluated as this:

    greple --le &line=10,20-30,40

=item B<expand> I<name> I<string>

Define local option I<name>.  Command B<expand> is almost same as
command B<option> in terms of its function.  However, option defined
by this command is expanded in, and only in, the process of
definition, while option definition is expanded when command arguments
are processed.

This is similar to string macro defined by following B<define>
command.  But macro expansion is done by simple string replacement, so
you have to use B<expand> to define option composed by multiple
arguments.

=item B<define> I<name> string

Define macro.  This is similar to B<option>, but argument is not
processed by I<shellwords> and treated just a simple text, so
meta-characters can be included without escape.  Macro expansion is
done for option definition and other macro definition.  Macro is not
evaluated in command line option.  Use option directive if you want to
use in command line,

    define (#kana) \p{InKatakana}
    option --kanalist --nocolor -o --join --re '(#kana)+(\n(#kana)+)*'
    help   --kanalist List up Katakana string

=item B<help> I<name>

If B<help> directive is used for same option name, it will be printed
in usage message.  If the help message is C<ignore>, corresponding
line won't show up in the usage.

=item B<builtin> I<spec> I<variable>

Define built-in option which should be processed by option parser.
Arguments are assumed to be L<Getopt::Long> style spec, and
I<variable> is string start with C<$>, C<@> or C<%>.  They will be
replaced by a reference to the object which the string represent.

See B<pgp> module for example.

=item B<autoload> I<module> I<options> ...

Define module which should be loaded automatically when specified
option is found in the command arguments.

For example,

    autoload -Mdig --dig --git

replaces option "C<--dig>" to "C<-Mdig --dig>", so that B<dig> module
is loaded before processing C<--dig> option.

=back

Environment variable substitution is done for string specified by
C<option> and C<define> directives.  Use Perl syntax B<$ENV{NAME}> for
this purpose.  You can use this to make a portable module.

When B<greple> found C<__PERL__> line in F<.greplerc> file, the rest
of the file is evaluated as a Perl program.  You can define your own
subroutines which can be used by C<--inside>/C<--outside>,
C<--include>/C<--exclude>, C<--block> options.

For those subroutines, file content will be provided by global
variable C<$_>.  Expected response from the subroutine is the list of
array references, which is made up by start and end offset pairs.

For example, suppose that the following function is defined in your
F<.greplerc> file.  Start and end offset for each pattern match can be
taken as array element C<$-[0]> and C<$+[0]>.

    __PERL__
    sub odd_line {
        my @list;
        my $i;
        while (/.*\n/g) {
            push(@list, [ $-[0], $+[0] ]) if ++$i % 2;
        }
        @list;
    }

You can use next command to search pattern included in odd number
lines.

    % greple --inside '&odd_line' pattern files...


=head1 MODULE

You can expand the B<greple> command using module.  Module files are
placed at F<App/Greple/> directory in Perl library, and therefor has
B<App::Greple::module> package name.

In the command line, module have to be specified preceding any other
options in the form of B<-M>I<module>.  However, it also can be
specified at the beginning of option expansion.

If the package name is declared properly, C<__DATA__> section in the
module file will be interpreted same as F<.greplerc> file content.  So
you can declare the module specific options there.  Functions declared
in the module can be used from those options, it makes highly
expandable option/programming interaction possible.

Using C<-M> without module argument will print available module list.
Option C<--man> will display module document when used with C<-M>
option.  Use C<--show> option to see the module itself.  Option
C<--path> will print the path of module file.

See this sample module code.  This sample defines options to search
from pod, comment and other segment in Perl script.  Those capability
can be implemented both in function and macro.

    package App::Greple::perl;

    use Exporter 'import';
    our @EXPORT      = qw(pod comment podcomment);
    our %EXPORT_TAGS = ( );
    our @EXPORT_OK   = qw();
    
    use App::Greple::Common;
    use App::Greple::Regions;
    
    my $pod_re = qr{^=\w+(?s:.*?)(?:\Z|^=cut\s*\n)}m;
    my $comment_re = qr{^(?:\h*#.*\n)+}m;
    
    sub pod {
        match_regions(pattern => $pod_re);
    }
    sub comment {
        match_regions(pattern => $comment_re);
    }
    sub podcomment {
        match_regions(pattern => qr/$pod_re|$comment_re/);
    }
    
    1;
    
    __DATA__
    
    define :comment: ^(\s*#.*\n)+
    define :pod: ^=(?s:.*?)(?:\Z|^=cut\s*\n)
    
    #option --pod --inside :pod:
    #option --comment --inside :comment:
    #option --code --outside :pod:|:comment:
    
    option --pod --inside '&pod'
    option --comment --inside '&comment'
    option --code --outside '&podcomment'

You can use the module like this:

    greple -Mperl --pod default greple

    greple -Mperl --colorful --code --comment --pod default greple

If special subroutine C<initialize()> and C<finalize()> are defined in
the module, they are called at the beginning with
L<Getopt::EX::Module> object as a first argument.  Second argument is
the reference to C<@ARGV>, and you can modify actual C<@ARGV> using
it.  See L<App::Greple::find> module as an example.

Calling sequence is like this.  See L<Getopt::EX::Module> for detail.

    1) Call initialize()
    2) Call function given in -Mmod::func() style
    3) Call finalize()

=head1 HISTORY

Most capability of B<greple> is derived from B<mg> command, which has
been developing from early 1990's by the same author.  Because modern
standard B<grep> family command becomes to have similar capabilities,
it is a time to clean up entire functionalities, totally remodel the
option interfaces, and change the command name. (2013.11)


=head1 SEE ALSO

L<grep(1)>, L<perl(1)>

L<App::Greple>, L<https://github.com/kaz-utashiro/greple>

L<Getopt::EX>, L<https://github.com/kaz-utashiro/Getopt-EX>


=head1 AUTHOR

Kazumasa Utashiro


=head1 LICENSE

Copyright 1991-2024 Kazumasa Utashiro

This library is free software; you can redistribute it and/or modify
it under the same terms as Perl itself.

=cut

#  LocalWords:  greple egrep foo baz yabba dabba doo ascii greplerc
#  LocalWords:  regex readlist iname jpg jpeg gif png tbz tgz pdf RGB
#  LocalWords:  perlre fgrep grep perl joinby KATAKANA InKatakana utf
#  LocalWords:  nonewline filestyle linestyle chdir mtime nocolor jis
#  LocalWords:  STDOUT colormap Cyan BLOCKEND LESSANSIENDCHARS setuid
#  LocalWords:  sprintf regioncolor uniqcolor ansicolor nocolorful jp
#  LocalWords:  struct sockaddr blockend icode euc shiftjis sjis zcat
#  LocalWords:  ocode gunzip gpg FILELABEL substr eval misc unicode
#  LocalWords:  GREPLEOPTS shellwords Katakana builtin pgp autoload
#  LocalWords:  ENV App ARGV mg Kazumasa Utashiro github colorindex
#  LocalWords:  matchcount gzipped stdin func CPANMINUS cpanm kana
