#!/usr/bin/perl -n

use strict;
use Getopt::Long;

use vars qw( $value $regex $full $neg $regex_match $VERSION);

sub usage {
print <<EOD;
$0 -match to_match [-value match_against] [-negate] [-full] logfile
    -match      part of log file you want to display. values are:
                    i - ip or host
                    d - date
                    m - request method (e.g POST, GET etc)
                    u - URI requested
                    p - protocol
                    c - response code
                    s - request size
                    r - referer
                    a - user agent
    -value      value to match against (optional).
    -regex      value used as a regular expression
    -full       display full line from log
    -negate     display lines where value doesn't match

Only the first letter of an option is required.
See perldoc $0 for full information.
EOD
exit;
}

BEGIN {
    $VERSION = 0.11;

    #127.0.0.1 - - [01/Jul/2003:11:28:09 +1000] "GET / HTTP/1.1" 200 1927 "-" "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.4b) Gecko/20030507"

    my $ip = '\S+';
    my $date = '\[[^\]]*?\]';
    my $req = '"[^"]*?"';
    my $code = '\d+';
    my $size = '\S+';
    my $referer = '"[^"]+?"';
    my $ua = '"[^"]*?"';
   
    my %map = (i => 'ip', c => 'code', s => 'size');
    
    my $match_type;
    GetOptions( 'match=s'   =>  \$match_type,
                'value=s'   =>  \$value,
                'full'      =>  \$full,
                'negate'    =>  \$neg,
                'regex'     =>  \$regex_match);

    if ( $match_type =~ m/^(i|c|s)/ ) {
        eval '$' . $map{$1} . ' = "($' . $map{$1} . ')"';
    } elsif ( $match_type =~ /^r/ ) {
        $referer = '"([^\-][^"]+?)"';
    } elsif ( $match_type =~ /^a/ ) {
        $ua = '"([^"]+?)"';
    } elsif ( $match_type =~ /^m/ ) {
        $req = '"(\S+)[^"]*?"';
    } elsif ( $match_type =~ /^u/ ) {
        $req = '"\S+\s+(\S+)[^"]*?"';
    } elsif ( $match_type =~ /^p/ ) {
        $req = '"\S+\s+\S+\s+(\S+)"';
    } elsif ( $match_type =~ /^d/ ) {
        $date = '\[([^\]]*?)\]';
    } else {
        usage();
    }

    $regex = '^' . join('\s+', $ip, '\S+', '\S+', $date, $req, $code, $size, $referer, $ua);
    # warn "$regex\n";
}

m#$regex#;

my $is_match = 0;

if ( $value and $1 ) {
    if ( $regex_match ) {
        if ( $neg and ( $1 !~ /$value/i ) ) {
            $is_match = 1;
        } elsif ( !$neg and ( $1 =~ /$value/i ) ) {
            $is_match = 1;
        }
    } else {
        if ( $neg and ( $1 ne $value ) ) {
            $is_match = 1;
        } elsif ( !$neg and ( $1 eq $value ) ) {
            $is_match = 1;
        }
    }
} elsif ( !$value and $1 ) {
    $is_match = 1;
}

if ($is_match ) {
    print $full ? $_ : "$1\n";
}

__END__

=head1 NAME 

apache_log_info

=head1 DESCRIPTION

apache_log_info lets you quickly pull simple information like referers or response codes out of Apache log files. It also can perform simple matching on the information it pulls out. It only works for files in the combined log format. 

=head1 USAGE

apache_log_info -match to_match [-value match_against] [-negate] [-full] logfile
    -match      part of log file you want to display. values are:
                    i - ip or host
                    d - date
                    m - request method (e.g POST, GET etc)
                    u - URI requested
                    p - protocol
                    c - response code
                    s - request size
                    r - referer
                    a - user agent
    -value      value to match against (optional).
    -regex      value used as a regular expression
    -full       display full line from log
    -negate     display lines where value doesn't match

Only the first letter of an option is required.

=head1 EXAMPLES

List all the reponse codes in a log file

    apache_log_info -m c /path/to/access.log

and show the complete line from the log file
    
    apache_log_info -m c -f /path/to/access.log

List all the 200 reponse codes in a log file

    apache_log_info -m c -v 200 /path/to/access.log

List all the non 200 response codes in a log file

    apache_log_info -m c -v 200 -n /path/to/access.log

List all the response codes of 300-399

    apache_log_info -m c -r -v ^3 -n /path/to/access.log

=head1 CAVEATS

The matching is exact. -v 20 will only match 20. It won't match 200 or 320. Use the -r option if you want partial matching

=head1 AUTHOR

Struan Donald
<struan@exo.org.uk>
http://www.exo.org.uk/code/

=head1 COPYRIGHT

Copyright (C) 2003 Struan Donald. All rights reserved.

This program is free software; you can redistribute                             
it and/or modify it under the same terms as Perl itself.

=cut
