#! /usr/bin/perl -w

# vim:syntax=perl

use strict;
use lib '/usr/share/perl5';

package Lire::WWW::ExtendedLog;

use vars qw/ @ISA /;

use Lire::DlfSchema;
use Lire::W3CExtendedLog;
use Lire::Program qw( :msg );

BEGIN {
    @ISA = qw/Lire::W3CExtendedLog/;
}

my $schema	= Lire::DlfSchema::load_schema( "www" );

my %w3c_field2dlf =
  (
   'cs-username'    => 'who',
   'cs(User-Agent)' => 'useragent',
   'cs(Referer)'    => 'referer',
   'sc-status'	    => 'http_result',
   'sc-bytes'	    => 'requested_page_size',
   'cs-method'	    => 'http_action',
   'cs-version'	    => 'http_protocol',
  );

sub build_parser {
    my ( $self ) = shift;
    $self->SUPER::build_parser( @_ );

    my @fields = split /\s+/, $self->{fields};
    my %fields = map { $_ => 1 } @fields;

    my @mapped   = ();
    my @dlf_fields;
    foreach my $f ( @fields ) {
	if ( exists $w3c_field2dlf{$f} ) {
	    push @mapped, $f;
	    push @dlf_fields, $w3c_field2dlf{$f};
	}
    }

    # Create the DLF maker function
    push @dlf_fields, "time" if $fields{time};
    push @dlf_fields, "requested_page"
      if $fields{'cs-uri'} || $fields{'cs-uri-stem'};
    push @dlf_fields, "client_host"
      if $fields{'c-ip'} || $fields{'c-dns'};
    # Keep only one of each
    my %dlf_fields = map { $_ => 1 } @dlf_fields;
    @dlf_fields = sort keys %dlf_fields;

    lr_info( "mapped DLF fields: ", join( ", ", @dlf_fields ) );

    my $dlf_maker = $schema->make_hashref2asciidlf_func( @dlf_fields );

    $self->{www_dlf_converter} = sub {
	my $w3c = $self->{w3c_parser}->( $_[0] );

	# Those fields that are mapped directly
	my %dlf = ( time => $w3c->{lire_time} );
	foreach my $name ( @mapped ) {
	    $dlf{$w3c_field2dlf{$name}} = $w3c->{$name};
	}
	# Client_host
	if ( exists $w3c->{'c-dns'} && $w3c->{'c-dns'} ne '-' ) {
	    $dlf{client_host} = $w3c->{'c-dns'};
	} elsif ( exists $w3c->{'c-ip'} ) {
	    $dlf{client_host} = $w3c->{'c-ip'};
	}

	# URL
	if ( exists $w3c->{'cs-uri'} && $w3c->{'cs-uri'} ne '-' ) {
	    $dlf{requested_page} = $w3c->{'cs-uri'};
	} elsif ( exists $w3c->{'cs-uri-stem'} ) {
	    $dlf{requested_page} = $w3c->{'cs-uri-stem'};
	}

	return $dlf_maker->( \%dlf );
    }
}

sub parse_record {
    $_[0]->{www_dlf_converter}->($_[1]);
}

package main;

use Lire::Program qw( :msg :dlf );

my $lines	= 0;
my $dlflines    = 0;
my $errorlines	= 0;

init_dlf_converter( "www" );

my $parser = new Lire::WWW::ExtendedLog;

# Parse the header
my $line;
while (defined( $line = <> )) {
    $lines++;

    eval {
	my $dlf = $parser->parse( $line );
	if ( $dlf ) {
	    print join( " ", @$dlf), "\n";
	    $dlflines++;
	}
    };
    if ( $@ ) {
	lr_warn( $@ );
	$errorlines++;
    }
}

end_dlf_converter( $lines, $dlflines, $errorlines );

exit 0;

__END__

=pod

=head1 NAME

w3c_extended2dlf - convert W3C Extended Log file as used by MS IIS to DLF

=head1 SYNOPSIS

B<w3c_extended2dlf> I<file>

=head1 DESCRIPTION

B<w3c_extended2dlf> converts web server log files in the W3C Extended
Log Format to the www DLF. This log format is defined at
http://www.w3.org/TR/WD-logfile.html

It is used by IIS 4.0 and IIS 5.0. This is a customizable format which
contains an header specifying which information is present in the
log.

Some documenation on these log file formats is available in the section on
"Analyzing Log Files (IIS 6.0)" in the "IIS 6.0 Documentation" at the
"Microsoft Windows Server 2003 TechCenter" at
http://www.microsoft.com/technet/prodtechnol/WindowsServer2003/
Library/IIS/610b7d2c-90d6-4e40-be79-aaf88a283f03.mspx?mfr=true .
The newer IIS 7.0 product is documented at http://www.iis.net/.

To have the maximum information in you reports, we suggests that you log
the following fields :

 date, time, c-ip, c-dns, cs-uri, cs-method, sc-bytes, sc-status,
 cs(User-Agent), cs(Referer) and cs-username

We also support the cs-uri-stem field.

Other fields will be ignored.

=head1 LIMITATIONS

The converter doesn't handle aggregation (record with count field) and
will refuse to process those logs. Also it doesn't support changing
the fields in the middle of the log file. It will ignore records that
don't have the same schema than the first one defined.

=head1 EXAMPLES

To process a log as produced in the W3C Extended Log Format:

 $ w3c_extended2dlf < extended.log

w3c_extended2dlf will be rarely used on its own, but is more likely
called by lr_log2report:

 $ lr_log2report w3c_extended < /var/log/httpd/extended.log

=head1 AUTHORS

Francis J. Lacoste <flacoste@logreport.org>

=head1 VERSION

$Id: w3c_extended2dlf.in,v 1.17 2009/03/15 08:10:55 vanbaal Exp $

=head1 COPYRIGHT

Copyright (C) 2001, 2002 Stichting LogReport Foundation LogReport@LogReport.org

This program is part of Lire.

Lire is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program (see COPYING); if not, check with
http://www.gnu.org/copyleft/gpl.html.

=cut

# Local Variables:
# mode: cperl
# End:
