#!/usr/local/bin/perl -w

# Exim_help: (try to) extract pertinent information
# from the Exim specification documents.
#
# Author: Hugh Sasse <hgs@dmu.ac.uk>
# Date started: 25-JUN-1999
#
# Inspired by Philip Hazel after a discussion on a
# --help option for Exim
#
# The aim is to base this on the structure of the
# document.
# Pattern to match for chapter boundaries: /^\s+\d+\.\s+\w/
# but this cannot be used in the filter document.
#

# last modified 21-JAN-2000
# to change to 3.12
use strict;		# get Perl to check as much as possible
use Getopt::Long;	# for things like -help etc


# YOU WILL NEED TO BE SURE THESE ARE RIGHT:
# where to find the Exim Specification file:
$main::spec_file = "/home/hgs/exim/exim-3.12/doc/spec.txt";
# where to fine the Filter Specification;
$main::filter_file = "/home/hgs/exim/exim-3.12/doc/filter.txt";
# where to find the FAQ as a text file:
$main::faq_file = "/home/hgs/exim/exim-3.12/doc/FAQ.txt";



# Variables related to command line options:
@main::patterns = ();	# list of patterns to search for
$main::all_patterns = 0;		# all the patterns must match
$main::book_order = 0;	# show line numbers
$main::numbers = 0;	# show line numbers
$main::filter_only = 0;	# only look in filter spec
$main::fixed = 0;	# Used fixed strings, not regexps
$main::match_order = 1;	# best matches first
$main::spec_only = 0;	# only look in exim spec
$main::faq_only = 0;	# only look in exim faq
$main::help = 0;	# If we want help.
$main::html = 0;	# If we want output in HTML.
$main::ignorecase = 0;	# whether to ignore case
$main::version = 0;	# whether to ignore case

if($#ARGV == -1)
{
   &help_msg();
}

# We don't want case to be ignored in Getopt:
&Getopt::Long::config("no_ignorecase"); # Thanks Philip!

print "GetOptions failed\n" unless (&GetOptions('e|p|pattern=s@' => \@main::patterns, 
        'a|and|all_patterns|all-patterns!' => \$main::all_patterns,
        'b|book_order|book-order!' => \$main::book_order,
        'f|filter_only|filter-only!' => \$main::filter_only,
        'F|fixed!' => \$main::fixed,
        'h|help!' => \$main::help,
        'H|html|HTML!' => \$main::html,
        'm|match_order|match-order!' => \$main::match_order,
        'n|number!' => \$main::numbers,
        'q|faq_only|faq-only!' => \$main::faq_only,
        's|spec_only|spec-only!' => \$main::spec_only,
        'i|ignorecase|ignore_case|ignore-case!' => \$main::ignorecase,
        'v|version!' => \$main::version
        ) );

if ($main::help)
{
   &help_msg();
}
if ($main::version)
{
   print "exim_help Version 0.5\n";
   print "By Hugh Sasse, with inspiration and help from\n";
   print "Philip Hazel.\n";
   print "This is probably still an alpha release, because I.\n";
   print "have had little feedback on it :-).\n";
   exit(0);
}

# any other keywords are patterns
push(@main::patterns, @ARGV);

if ($main::fixed)
{
    @main::patterns = grep(s/([^\w\s])/\\$1/g, @main::patterns);
}

# print STDERR "Filter Only is $main::filter_only\n";
# print STDERR "Spec Only is $main::spec_only\n";

# print STDERR "Book Order is $main::book_order\n";
# print STDERR "Match Order is $main::match_order\n";

# If both book- and match-order searches are (not) wanted
# then since match is the default, book must have priority.
if ($main::match_order == $main::book_order)
{
    $main::match_order = not($main::book_order);
}

# print STDERR "Book Order is $main::book_order\n";
# print STDERR "Match Order is $main::match_order\n";

# we wish to display matching paragraphs, so
# information has some context.

{
    package paragraph;

    sub new
    {
        my $proto = shift;
        my $class = ref($proto) || $proto;
        my $self = {};
        if (@_)
        {
            $self->{"start"} = shift;
            $self->{"end"} = shift;
        }
        else
        {
            $self->{"start"} = 0;
            $self->{"end"} = 0;
        }
        $self->{"parent"} = shift;
        $self->{"previous"} = shift;  # where the previous paragraph is if any

        $self->{"score"} = 0;
        $self->{"words"} = 0;	# how many words in the paragraph
        bless($self, $class);
        return($self);
    }

    sub start
    {
        my $self = shift;
        if (@_)
        {
            $self->{"start"} = shift;
        }
        return($self->{"start"});
    }

    sub end
    {
        my $self = shift;
        if (@_)
        {
            $self->{"end"} = shift;
        }
        return($self->{"end"});
    }

    # get/set words in paragraph
    sub words
    {
        my $self = shift;
        if (@_)
        {
            $self->{"words"} = shift;
        }
        return($self->{"words"});
    }

    sub score
    {
        my $self = shift;
        if (@_)
        {
            $self->{"score"} = shift;
        }
        return($self->{"score"});
    }

    sub add_to_score
    {
        my $self = shift;
        if (@_)
        {
            $self->{"score"} += shift;
        }
        return($self->{"score"});
    }

    sub contains_line
    {
        my $self = shift;
        my $line = shift;
        # We are not interested in the blank
	# lines that delimit the paragraphs, only
	# the text.
        return( ($self->{"start"} < $line) &&
                ($self->{"end"} > $line) );
    }

    sub display_results
    {
        my $self = shift;
	my $look_back = 1;
        if (@_)
        {
	    $look_back = shift;
        }
        my $line = 0;
        my $pattern = "NO PATTERN";
        my $line_text = "<NOTHING>";
        my $parent = $self->{"parent"};

        # A paragraph describing an option may match, but 
	# the option would not normally be displayed.  This
	# code is to fix this.

        # Previous would hold the parameter type and default value
	my $previous = $self->{"previous"};

        # print "\$self = $self\n";
        # print "\$parent = $parent\n";
        # print "\$previous = $previous\n";

        if ($look_back)
        {
	    if (ref($previous))
	    {
		if ($previous->{"score"} == 0)
		{
		    # $prev_prev would hold the parameter name only...
		    my $prev_prev = $previous->{"previous"};
		    if (ref($prev_prev))
		    {
			# so it would be only one word
			if ($prev_prev->{"score"} == 0)
			{
			    # it would not normally appear.
			    if ($prev_prev->{"words"} == 1)
			    {
				# Then it is the keyword for this paragraph.
				# So display it
				$prev_prev->display_results(0);
				# display the parameter info:
				$previous->display_results(0);
			    }
			}
		    } # end if ref(prev_prev)
		} # end if previous score = 0
	    } # end if ref(previous)
        }  # end if lookback

        # sleep(30);
        print "<PRE>\n" if $main::html;
        foreach $line ($self->{"start"}..$self->{"end"})
        {
            $line_text = ${$$parent->{"text"}}[$line];
	    if ($main::html)
            {
                # This if is factored outside the foreach loop.
                if ($main::ignorecase)
                {
		    foreach $pattern (@main::patterns)
		    {
			if ($$parent->do_match($line, $pattern))  
			{
			    $line_text =~ s|$pattern|<STRONG>$pattern</STRONG>|gi;
			}
		    }
                }
                else
                {
		    foreach $pattern (@main::patterns)
		    {
			if ($$parent->do_match($line, $pattern))  
			{
			    $line_text =~ s|$pattern|<STRONG>$pattern</STRONG>|g;
			}
		    }
                }
            }
            print "$line: " if $main::numbers;
	    print $line_text;
        };
        print "\n";
        print "</PRE>\n" if $main::html;
    }
}




{
    package document;

    sub new 
    {
        my $proto = shift;
        my $class = ref($proto) || $proto;
        my $self = {};
        $self->{"name"} = shift;
        $self->{"file"} = "";
        $self->{"text"} = [];
        $self->{"has_chapters"} = 0;
        $self->{"chapters"} = [];
        $self->{"blanks"} = [];
        $self->{"paragraphs"} = [];
        bless($self, $class);
        return($self);
    }



    sub has_chapters
    {
        my $self = shift;
        if (@_)
        {
	    $self->{"has_chapters"} = shift;
        }
        return($self->{"has_chapters"});
    }



    sub open
    {
        my $self = shift;
        my $file = shift;
        if ($file)
        {
            open(FILE, "<$file") or die "unable to open $file for reading ";
            @{$self->{text}} = <FILE>;
            close(FILE);
            my $lastblank = -1;
            my $line;
            my $score = 0;
            my $line_score = 0;
            my $pattern;
            my $paragraph;
            my $prev_paragraph = 0;
	    my %pattern_counts = ();
            my @words = ();

            foreach $line (0..$#{$self->{"text"}})
            {
                $line_score = 0;
                push(@words, split(/\s+/, ${$self->{"text"}}[$line]));
                @words = grep(!/^$/, @words);
                
                foreach $pattern (@main::patterns)
                {
		    my $matches = 0;
                    $matches = $self->do_match($line, $pattern);
                    $line_score += $matches;
                    $self->{"matches"} += $matches;
                    $pattern_counts{$pattern} = 1 if ($matches);
                }
                $score += ($line_score * $line_score);

                # A blank line with a | on it is intended
                # to be a change bar on its own.
		if (grep(/^\s*(\|)?\s*$/, ${$self->{"text"}}[$line]))
                {
		    push(@{$self->{"blanks"}},$line);
                    if ($line > $lastblank+1)
                    {
			$paragraph = new paragraph($lastblank+1,
                                                   $line-1,
                                                   \$self,
                                                   $prev_paragraph,
                                                   );
			$paragraph->words(scalar @words);
                        @words = ();

                        $prev_paragraph = $paragraph;
                        # paragraph is an object, so it is already a ref.
                        # No backslash as a result.

                        if ($main::all_patterns)
                        {
                            my $pcount = 0;
                            my $fpcount = 0;
                            my $pattern;

                            foreach $pattern (@main::patterns)
                            {
                               $fpcount++ if ($pattern_counts{$pattern});
                               $pcount++;
                            }
			    if ($fpcount == $pcount)
                            {
				$paragraph->score($score);
                            }
                            else
                            {
				$paragraph->score(0);
                            }
                        }
                        else
                        {
			    $paragraph->score($score);
                        }
			push(@{$self->{"paragraphs"}},$paragraph);
                        %pattern_counts = ();
                        $score = 0;
                    }
                }
		push(@{$self->{"chapters"}},$line) if (grep(/^\s+\d+\.\s+\w/, ${$self->{"text"}}[$line]));
		$lastblank = $line if (grep(/^\s*(\|)?\s*$/, ${$self->{text}}[$line]));
            }
        }
        return($self);
    } # end sub open





    sub do_match
    {
        my $self = shift;
        my $line = shift;
        my $pattern = shift;
        my $string = ${$self->{"text"}}[$line];
        my $result = 0;
        if ($main::ignorecase)
        {
            # count the matches (from perlfaq4(1))
            while($string =~ /$pattern/ig) {$result++;}; 
        }
        else
        {
            # count the matches (from perlfaq4(1))
            while($string =~ /$pattern/g) {$result++;}; 
        }
        return($result);
    }



    # In which chapter is this paragraph?
    sub in_chapter
    {
        my $self = shift;
        my $paragraph = shift;
        my $result = 0;
        my $chapter = 0;
        if ($self->has_chapters())
        {
	    foreach $chapter ( @{$self->{"chapters"}})
	    {
		last if ($chapter > $paragraph->{"start"});
		$result = $chapter;
	    }
        }
        return $result;
    }



    sub display_results
    {
        my $self = shift;
        my $paragraph;
        my $chapter = 0;
        my $line;
        my $last_line = 0;
        my $last_chapter = 0;

        print "<P>\n" if $main::html;
        print "In the ", $self->{"name"}," document:\n";
        print "</P>\n" if $main::html;
	if ($self->{"matches"})
	{
            if ($main::match_order)
            {
                @{$self->{"paragraphs"}} = sort
                {
                    if ($a->score() > $b->score())
                    {
                        return(-1);
                    }
                    elsif ($a->score() == $b->score())
                    {
                        return( $a->{"start"} <=> $b->{"start"});
                    }
                    else
                    {
                        return(1);
                    }
                }
                @{$self->{"paragraphs"}};
            }
	    foreach $paragraph (@{$self->{"paragraphs"}})
	    {
		if ($paragraph->score())
		{
                   if ($self->has_chapters())
                   {
		       $chapter = $self->in_chapter($paragraph);
		       if ($last_chapter != $chapter)
		       {
			   print "<P>" if $main::html;
			   print "\nIn chapter:\n";
			   print "</P>\n<PRE>\n" if $main::html;
			   print ${$self->{"text"}}[$chapter];
			   print "\n";
			   print "</PRE>\n" if $main::html;
			   $last_chapter = $chapter;
		       }
		   }
		   $paragraph->display_results(1) unless ($self->has_chapters() && ($chapter == 0));
		}
	    }
	}
	else
	{
	    print "<P>" if $main::html;
	    print "There are no matches\n";
	    print "</P>" if $main::html;
	}
    }

} # end of package document


# Produce the usage text for exim_help...
sub help_msg
{
    print << "END"
exim_help: provide help on exim.
usage exim_help [options] pattern....
where options is 
-a
-and
	display only the paragraphs containing
        every pattern.
-b 
-book_order
        Display the results in the order they
	appear in the docs.

-e pattern
-pattern pattern
	Search for this pattern in the file

-f
-filter_only
	Search the filter spec only.
-h
-help
	provide this help

-H
-html
	produce output in HTML with the
	patterns hightlighted

-i
-ic
-ignorecase
	match pattenrs without regard to case

-[no]f
-[no]filter_only
	search the filter specification only

-[no]s
-[no]spec_only
	search the Exim specification only.

-[no]n
-[no]number
	do [not] show the line numebers.

-m
-match_order
        Display the results with best matching
        paragraphs first

-v
-version
	Print the version information.

END
;
    exit(0);
}


# initialise information from files...
unless ($main::filter_only || $main::faq_only)
{
    $main::exim_spec = new document("Exim Specification");
    $main::exim_spec->has_chapters(1);
    $main::exim_spec->open($main::spec_file);
};

unless ($main::spec_only || $main::faq_only)
{
    $main::filter_spec = new document("Exim Filter Specification");
    $main::filter_spec->has_chapters(0);
    $main::filter_spec->open($main::filter_file);
};

unless ($main::spec_only || $main::filter_only)
{
    $main::faq = new document("Exim FAQ");
    $main::faq->has_chapters(0);
    $main::faq->open($main::faq_file);
};

print "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0//EN\"\n" if $main::html;
print "\"http://www.w3.org/TR/REC-html40/strict.dtd\">\n" if $main::html;
print "<HTML>\n<HEAD>\n" if $main::html;
print "<TITLE>Exim Help Results</TITLE>\n" if $main::html;
print "</HEAD>\n<BODY>\n" if $main::html;

$main::exim_spec->display_results() unless ($main::filter_only || $main::faq_only);
$main::filter_spec->display_results() unless ($main::spec_only || $main::faq_only);
$main::faq->display_results() unless ($main::spec_only || $main::filter_only);

print "</BODY>\n</HTML>\n" if $main::html;
