#!/usr/bin/perl

#
# WebEscorter version 1.1.0
#      for Ajax form client
#

# INCLUDE MODULES
use strict;
use warnings;
use Encode qw( from_to );
#use CGI qw/:-no_xhtml/;


# PREPARE 
my $querystr=$ENV{QUERY_STRING};

my $file='/var/log/apache2/access.log';        # Apacheのアクセスログのファイル名
print "Content-Type: text/plain; charset=UTF-8\n\n";

# GET QUERYS
#my $q = new CGI;
#my $data = $q->param('dat');
my ($viewtype, $first, $second, $third, $datequery);

# コマンドライン引数が定義されてなかったら何もしない
unless (defined $querystr) {
} else {
    ($viewtype, $first, $second, $third ) = split(/,/, $querystr );
}

# できるだけwarningが出ないように :-)
unless (defined $viewtype) {
    $viewtype = 0;
}

if ($viewtype == 0) {
    #print '<h1>Welcome to WebEscorter!</h1><p>be with you</p>';
    #print $q->end_html;
    print "Welcome\n";
    exit;
} elsif ($viewtype==1) {
    if (defined $first and $first !~ /[\d\.]+/) {
    } else {
        $first = (gethostbyname(substr($first,0,64)))[4];
        $first = sprintf("%u.%u.%u.%u", unpack("C*", $first));
    }

} elsif ($viewtype==2) {
    if (defined $first and $first =~ /\d{4}/) {
#        $first = substr($first,2,2);
    } else {
        &invalid_param("first");
    }
    if (defined $second and $second =~/\d{1,2}/) {
    my %hash = (
    1  => 'Jan',
    2  => 'Feb',
    3  => 'Mar',
    4  => 'Apr',
    5  => 'May',
    6  => 'Jun',
    7  => 'Jul',
    8  => 'Aug',
    9  => 'Sep',
    10 => 'Oct',
    11 => 'Nov',
    12 => 'Dec',
    );
    $second = $hash{$second};
    } else {
        &invalid_param("second");
    }
    if (defined $third and $third =~/\d{1,2}/) {
    } else {
        &invalid_param("third");
    }
    $datequery = $third.'/'.$second.'/'.$first;
} elsif ($viewtype==3) {
    
} else {
    &invalid_param("viewtype");
}

#print $q->header;
#print $q->start_html(-title=>'WebEscorter - The proxy log analyzing system',Link({-rel=>'stylesheet',-href=>'./style.css'}));
#print $q->start_html(-title=>'WebEscorter - The proxy log analyzing system');

# ANALYZE CODE
#-- file exists? --
if ( -e $file ){

    #-- file readable? --
    if ( -r $file){
        open(LOG, "<$file") or die("Cannot open log file : ($!)");
        my $buff;
           
            # SCAN URI LOOP
            require Parse::AccessLogEntry;
            my $p=Parse::AccessLogEntry::new();
            while( $buff = <LOG>){
                chomp $buff;
                #-- 解析 --
                my $Hashref=$p->parse("$buff");
                our $url = $Hashref->{file};
                #-- プロキシリクエスト以外は取り除く --
                if ($url !~ /s?https?:\/\/[-_.!~*'()a-zA-Z0-9;\/\?:\@&=+\$,%#]+/) {
                } else {
                    our ($date,$time);
                    our $host = $Hashref->{host};
                    #our $surl = substr($url,0,64);
                    our $method = $Hashref->{rtype};
                    #---- 特定クライアント追跡モード ----
                    if ($viewtype == 1) {
                        if ($host =~ /${first}/) {
                            $date = "$Hashref->{date} $Hashref->{time}";
                            &printlnCsv;
                        } else {
                        }
                    }
                    #---- 日付追跡モード ----
                    elsif ($viewtype == 2) {
                        $date = $Hashref->{date};
                        $time = $Hashref->{time};
                        if ($date =~ /${datequery}/) {
                            $date = $time;
                            &printlnCsv;
                        } else {
                        }
                    }
                    elsif ($viewtype == 3) {
                        #if ($url =~ /.+?\.google\..+?\/search\?.+/) {
                        if ($url =~ /http:\/\/www.google\.[\.\w]+\/search/i) {
                            $date = $Hashref->{date};
                            $time = $Hashref->{time};
                            $date = "$Hashref->{date} $Hashref->{time}";
                            # URLから検索クエリを抜き取る
                            $url =~ /qu?=(.+?)&/;
                            my $que = $1;
                            $url =~ /ie=([\w\d\-_]+)/;
                            my $enc = $1;
                            # URLのパーセントエンコードをデコードする
                            $que =~ tr/+/ /;
                            $que =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
                            # URLの文字コードを変換
                            eval {from_to($que, $enc, "utf-8")};
                            $url = '[GOOGLE] '.$que;
                            &printlnCsv;
                            } elsif ($url =~ /http:\/\/[\w]+\.yahoo\.[\.\w]+\/search/i) {
                            $date = $Hashref->{date};
                            $time = $Hashref->{time};
                            $date = "$Hashref->{date} $Hashref->{time}";
                            # URLから検索クエリを抜き取る
                            $url =~ /p=(.+?)&/;
                            my $que = $1;
                            # URLのパーセントエンコードをデコードする
                            $que =~ tr/+/ /;
                            $que =~ s/%([0-9A-Fa-f][0-9A-Fa-f])/pack('H2', $1)/eg;
                            # URLの文字コードを変換
                            $url =~ /ei=([\w\d-_]+)/;
                            if (defined $1 and $1 !~ /utf-8/) {
                                my $enc = $1;
                                eval {from_to($que, $enc, "utf-8")};
                            } else {
                                eval {from_to($que, "euc-jp", "utf-8")};
                            }
                            $url = '[YAHOO] '.$que;
                            &printlnCsv;
                        }
                    }
                }
            }
        close(LOG);
    } else {
        print 'Cannot read log file';
    }
      
} else {
  print 'Log file does not exist';
  }

# PRINT THE LOG LINE
sub printlnCsv {
          if ($main::method =~ /connect/i) {
              print "[$main::method]\n";
              #-- HTTPSだった場合 --
              my $printmethod = $main::method;
              my $printurl = $main::url;
              #my $printsurl = $main::surl;
              my $printsurl = $main::url;
              $printurl =~ s/([^.]+\..+):(\n)/$1:$2/;
              $printsurl = "[CONNECT]:$main::url";
              print "$main::date\t$main::host\t$printmethod:$printsurl\n";
          } else {
              print "$main::date\t$main::host\t$main::url\n";
        }
}

# エラー表示ルーチン
sub invalid_param {
      print "Invalid parameter: $_[0]\n";
      #print $q->end_html;
      exit;
    }


