#! /usr/local/bin/perl # Change the above row to the location of your perl script # mathsci2bibtex # This is a beta version! Send your comments to ilya@math.mit.edu # $Id: convert,v 1.1 1993/04/19 06:01:35 ilya Exp ilya $ # $Log: convert,v $ # Revision 1.1 1993/04/19 06:01:35 ilya # Initial revision # sub configure { # user modifiable definitions $opt_rm='\\operatorname{'; #what to substitute instead of "{\rm" code $opt_bf='{\\Bbb '; #what to substitute instead of "{\bf" code #separately for any letter to make bold $opt_scr='{\\cal '; #what to substitute instead of "{\scr" code $opt_germ='\\frac '; #what to substitute instead of "\germ" code $opt_crc32='n'; # use complicated algorithm? $opt_key_db='mathsci.dbk'; # name of the database of keys $opt_r_key_db='y'; # use existing database of keys? $opt_w_key_db='y'; # update existing database of keys? $opt_del_dub='y'; # search previous keys for dublicate of the given? $opt_exp_name='y'; # take the expanded field for the person name? $opt_exp_jrnl='y'; # take the expanded field for the journal name? $opt_believe_jrnl_year='y'; # the year in the JN field has a precedence $opt_believe_jrnl='translation'; # if 2 JN fields, take the translation # $opt_believe_jrnl='original'; # if 2 JN fields, take the original $opt_translated_title='n'; # what to do with translation for a generic language $opt_transl_lang='Russian'; $opt_orig_lang=''; $opt_gener_lang='French:German'; if (@ARGV && ($ARGV[$[])=~/^-./) { require "newgetopt.pl"; &NGetOpt('h','crc32:s','rm=s','scr=s','germ=s','bf=s','key_db=s','r_key_db:s','w_key_db:s', 'del_dup:s','exp_name:s','exp_jrnl:s','believe_jrnl=s', 'believe_jrnl_year:s','translated_title:s','translated_title:s', 'transl_lang:s','orig_lang:s','gener_lang:s'); } else { $opt_h='y' unless @ARGV; } #print STDERR $opt_crc32,"\n"; $opt_crc32=~s/^.*[-n].*$//; #print STDERR $opt_crc32,"\n"; $opt_r_key_db=~s/^.*[-n].*$//; $opt_w_key_db=~s/^.*[-n].*$//; $opt_del_dup=~s/^.*[-n].*$//; $opt_exp_name=~s/^.*[-n].*$//; $opt_exp_jrnl=~s/^.*[-n].*$//; $opt_believe_jrnl_year=~s/^.*[-n].*$//; $opt_translated_title=~s/^.*[-n].*$//; grep(($languages{$_}='translate'),split(":",$opt_transl_lang)); grep(($languages{$_}='original'),split(":",$opt_orig_lang)); grep(($languages{$_}='generic'),split(":",$opt_gener_lang)); #print STDERR %languages; #%languages=('Russian','translate' # ,'French','generic' # ,'German','generic' # ); # what to do with title and series, now: translate, # original, or generic (for recognition of a word) # user modifiable definitions - end $Usage = "Converter from the MathSci database format to a BibTeX format. Usage: $0 [-options] [MathSci_file] options: (default) -h give this message (n) -crc32 [yn] use the real crc32 for dublicate search ($opt_crc32) -key_db FILE name of the database of existing keys ($opt_key_db) -r_key_db [yn] read existing database of keys? ($opt_r_key_db) -w_key_db [yn] write existing database of keys? ($opt_w_key_db) -del_dup [yn] delete duplicate records? ($opt_del_dup) -exp_name [yn] use the expanded name field? ($opt_exp_name) -exp_jrnl [yn] use the expanded journal name field? ($opt_exp_jrnl) -believe_jrnl_year [yn] believe the year in the journal field? ($opt_believe_jrnl_year) -believe_jrnl translation | original which journal field to believe? ($opt_believe_jrnl) -translated_title [yn] which title to use if there are 2 (for generic language)? ($opt_translated_title) -transl_lang (:-separated list of Languages) For which languages use the translated titles? ($opt_transl_lang) -orig_lang (:-separated list of Languages) For which languages use the original titles? ($opt_orig_lang) -gener_lang (:-separated list of Languages) Some useful names of languages for pattern recognition ($opt_gener_lang) -rm STRING the string to change '{\\rm ' to ($opt_rm) -bf STRING the string to change '{\\bf ' to ($opt_bf) -scr STRING the string to change '{\\scr ' to ($opt_scr) -germ STRING the string to change '\\germ ' to ($opt_germ) To work from STDIN use $0 - or $0 -- " ; die "$Usage" if defined $opt_h; } &configure; $rn=0; # record counter %codes=('AU','author' ,'MR','mr_number' ,'ED','editor' ,'CT','contributor' ,'TI','title' ,'NT','note' ,'TIC','collection_title' ,'SE','series' ,'PY','year' ,'JN','journal' ,'JNT','journal_translated' ,'JNO','journal_original' ,'RF','reviewed_from' ,'PUBL','publisher' ,'LA','language' ,'SL','summary_language' ,'PC','primary_code' ,'SC','secondary_code' ,'RL','review_length' ,'RE','reviewer' ,'RN','reviewer_nonpersonal' ,'RT','review_type' ,'AB','abstract' ,'DEM','primary_descriptor' ,'DER','secondary_descriptor' ,'DT','document_type' ,'IS','issn_isbn' ,'CO','coden' ,'XN','cross_ref_mr_number' ,'XP','cross_paper_number' ,'AN','accesion_number' ,'MRI','mr_issue' ,'SF','subfile' ,'CMP','CMP_volume_issue'); # what to do with special substitutions %proc=('author','author' ,'journal','journal' ,'journal_translated','journal' ,'journal_original','journal' ,'publisher','publisher' ,'series','series' ,'editor','editor' ,'collection_title','collection_title' ,'title','title' ,'year','general' ,'note','general' ,'language','general' ,'document_type','general' ); # begins job &init_crc32 if $opt_crc32; # read database of keys $r_key_db="<$opt_key_db"; $w_key_db=">>$opt_key_db"; if ($opt_r_key_db && -f $opt_key_db) { open(r_key_db) || die "cannot open $opt_key_db for read"; while () { (($key,$cs)=/^(\w+),\s+(\d+)$/) || warn "wrong entry $_ in the checksum database"; $cs{$key}=$cs; #$kkey=substr($key,0,1); #eval "\$cs\$kkey{\$key}=\$cs;"; } close(r_key_db) || die "cannot close $opt_key_db for read"; } elsif ($opt_r_key_db) { print STDERR "there is no checksum database $opt_key_db\n"; } (open(w_key_db) || die "cannot open $opt_key_db for write") if $opt_w_key_db; #select(STDOUT); $*=1; $/="|\n\n"; while (<>) { #s/\r$//g; # DOS files in UNIX - anyway $/ won't work $searchhistory=0; $searchhistory=1, print STDERR "There was a search history in the record $rn of $ARGV.\n" # if s/^(\f\nNo\.[^\f]*\f)/\f\f/g; # the search history deletion if s/(\f\nNo\.[^\f]*\f)/\f\f/g; # the search history deletion $searchhistory=1, print STDERR "There was a SilverPlatter mark in the record $rn of $ARGV.\n" if s/^SilverPlatter.*$/\f\f/gi; # the Silverplatter mark deletion #if ($searchhistory) { #foreach (split(/^\f{2,}/)) { foreach (split(/\f+/)) { if (/\w/) {&dorecord;} } #} #else {&dorecord;}; } (close(w_key_db) || die "cannot write to $opt_key_db; disk full?!") if $opt_w_key_db;; sub dorecord { local($code,$lname,%add_list,%item_list,@authors,$key,$incollection,$reference_type,$lang); ++$rn; $incollection=0; s/^\f+$//g; # kill Page breaks s/\|\s+$/\|/g; # just in case... foreach (split(/\|$/)) { s/^(\n+|\s*MathSci\s+Disc[\-\s\d\/]+)\s*\d+\s+of\s+\d+(\s|\n)*\n+//i; # kill this "7 of 67" stuff if ( ( ($code,$lname)=/^([^-:\s]+)\s*([^-:]*)[-:]/ ) && defined $codes{$code} && defined $proc{$codes{$code}}) { eval "&$proc{$codes{$code}}($codes{$code});"; } else {&unknown_field;} #print join(':',%item_list), "\n"; } #print join(':',%item_list), ":\n"; # set year if (!$item_list{'year'}) { $item_list{'year'}=$add_list{'publisher_year'} if $add_list{'publisher_year'}; $item_list{'year'}=$add_list{'journal_year'} if $add_list{'journal_year'}; } if ($add_list{'publisher_year'} && $add_list{'publisher_year'} != $item_list{'year'}) { $add_list{'???'} .= "; " . $add_list{'publisher_year'}; #delete $add_list{'publisher_year'}; } if ($add_list{'journal_year'} && $add_list{'journal_year'} != $item_list{'year'}) { $add_list{'???'} .= "; " . $add_list{'journal_year'}; } if ($opt_believe_jrnl_year && $add_list{'journal_year'} ) { $item_list{'year'}=$add_list{'journal_year'}; } #delete $add_list{'journal_year'}; ($tmp)=($item_list{'address'} =~ /(\d+)\s*$/); ($item_list{'year'})=($item_list{'address'} =~ /([-\d\/]+)\s*$/) unless ($tmp < 1500) | ($tmp>2100) | $item_list{'year'}; $item_list{'year'} =~ s/;\s*\d{2}(\d{2})/\/\1/g; $item_list{'address'} =~ s/,\s+$item_list{'year'}$// if $item_list{'address'} && $item_list{'year'}; $translate_now=$opt_translated_title; $translate_now=1 if $languages{"\u\L$item_list{'language'}"} eq 'translate'; $translate_now=0 if $languages{"\u\L$item_list{'language'}"} eq 'original'; if ($item_list{'title'} =~ /\.\s+\[(.*)\]\s*$/) { $item_list{'title'} = $translate_now ? $1 : $` ; $item_list{'language'}='' unless $translate_now; } if ($item_list{'series'} =~ /\.\s+\[(.*)\]\s*$/) { $item_list{'series'} = $translate_now ? $1 : $` ; $item_list{'language'}='' unless $translate_now; } if ($add_list{'volume'}) { if ($item_list{'volume'} && $add_list{'volume'}!=$item_list{'volume'}) { $item_list{'title'} .= ", Vol. $add_list{'volume'}"; } else { $item_list{'volume'}=$add_list{'volume'}; } } #foreach $lang (split(/\W+/,$item_list{'language'})) { # $item_list{'organization'} = '' # if "\L$item_list{'organization'}" eq "\L$lang"; # print STDERR "if \L$item_list{'organization'}"," eq \L$lang\n"; #} while (1) { ($tmp=$item_list{'organization'})=~s/\W/\\\1/g; $item_list{'organization'} = '' if $item_list{'language'} =~ /$tmp/i; $item_list{'organization'} = '' if defined $languages{"\u\L$item_list{'organization'}"}; $item_list{'organization'}=$item_list{'organization?'}, delete $item_list{'organization?'}, next if (!$item_list{'organization'} && $item_list{'organization?'}); last; } $add_list{'???'} .= "; $item_list{'organization?'}" if $item_list{'organization?'}; $item_list{'language'} = $opt_believe_jrnl eq 'original' ? $` : $' if $qualifier && $opt_believe_jrnl && $item_list{'language'} =~ /;/; $item_list{'language'} =~ s/\s*;?\s*English\s*;?\s*//; $item_list{'volume'}=$1 if $item_list{'volume'} eq '' && $item_list{'booktitle'} =~ s/,?\s*vol\.\s+([\divxlcdm]+)$//i ; $item_list{'booktitle'} =~ s/,?\s*vol\.\s+$item_list{'volume'}$//i ; if ($item_list{'author'}) { # set key basing on authors &dokey('author'); } elsif ($item_list{'editor'}) { # set key basing on editors &dokey('editor'); } elsif ($item_list{'title'}) { # set key basing on title &dotitlekey('title'); } else { #something strange happened, set random key $key=int rand(1048576); print STDERR "Cannot construct a key, temporary key $key in the record $rn of $ARGV.\n"; } #print join(':',%item_list), "\n"; if ($item_list{'journal'}) { $reference_type='article'; } elsif ($item_list{'document_type'} =~ /Proceedings-Paper/) { #'Proceedings-Paper' $reference_type='inproceedings'; } elsif ($item_list{'document_type'} =~ /Proceedings/) { #'Proceedings' $reference_type='proceedings'; } elsif ($item_list{'document_type'} =~ /Book/) { #'Book' $reference_type='book'; } elsif ($item_list{'series'} ) { #'Book'???? In fact I don't know... $reference_type='book'; } elsif ($incollection) {$reference_type='inproceedings';} else { print STDERR "unknown type of reference, @misc substituted in the record $rn of $ARGV.\n"; $reference_type='misc'; } $item_list{'pages'}=$add_list{'book_pages'} #, delete $add_list{'book_pages'} unless $item_list{'pages'}; $item_list{'pages'}=$add_list{'publisher_pages'} #, delete $add_list{'publisher_pages'} unless $item_list{'pages'}; #publisher_pages #undef $item_list{'document_type'}; # ship out &latexize('title'); &latexize('booktitle'); $add_list{'???'} =~ s/^;\s+//; print STDERR "In $key in the record $rn of $ARGV: Cannot recognize: ", $add_list{'???'}, "\n" if $add_list{'???'}; next if $opt_del_dup && &cs==0; (print "\n@$reference_type{\t$key\n") || die "cannot write to STDOUT; disk full?!"; while (@_=each(%item_list)) { (print ",\t$_[ $[ ]\t={$_[ $[ +1 ]}\n" ) || die "cannot write to STDOUT; disk full?!" if $_[ $[ +1 ] && ($_[ $[ ] ne 'document_type'); } (print "}\n") || die "cannot write to STDOUT; disk full?!"; ((print w_key_db "$cs{$key}\n") || die "cannot write to $opt_key_db; disk full?!") if $opt_w_key_db; # divided in 2 parts for possible error in output writing, the second # part is written after the work is finished #undef %item_list; } sub author { $item_list{'author'} = &name ; } sub editor { $item_list{'editor'} = &name ; } sub name { s/^[^-:]+[-:]\s+//; s/((-\w\.)?)[.;,]?\s*$/\1/; # delete the trailing "." if not initial - just # in case s/,-/, /g; s/,\s*\([^()]*\)//g; #the address ", (1-CA)" field if (!$opt_exp_name) { s/\[[^][]*\]//g;} # delete the expanded name [Name Name1] field else { s/(^|;)[^[;]*\[([^];]*)\]/$1$2/g;} # change the name to the expanded name [Name Name1] field s/;/ and /g; s/\s+/ /g; s/^\s+//; s/-(\w{1,2}\.)/ $1/g; # work with initial s/\s(\w{1,2}\.)-/ $1 /g; # work with initial # work with prefixes substr($_,length($`)+length($1),length($2)) =~ s/-/ /g while /(^|\sand\s+)((([a-z]{1,3}|De)-)+)[A-Z]/; $_=join(" and ",grep(s/^([^,]+),([^,]+),\s+(Jr|III|IV).?\s*$/\2 \1, \3/||1, split(" and "))) if /,\s*(Jr|III|IV)\.?(\s|$)/; $_; } sub journal { $qualifier=''; # translation and original s/^[^-:]+[-:]\s+//; s/[.;,]?\s*$//; $qualifier="\L$1" if s/^(translation|original):\s+//i; return if ($qualifier && $opt_believe_jrnl && $opt_believe_jrnl ne $qualifier && $item_list{'journal'}); $add_list{'journal_year'}=$item_list{'volume'}=$item_list{'number'}= $item_list{'pages'} = undef if $item_list{'journal'}; # believe the second instance, hence erase the first #$item_list{'journal'}=/^(\S+)(\s*\(\d{1,3}\))?/; # chose the first word before [ $item_list{'journal'}=/^(.+)\[/; # chose the first word before [ $item_list{'journal'}= $1 if $opt_exp_jrnl && /\[([^]]*)\]/ ; # change the name to the expanded name [Name ] field $item_list{'journal'} =~ s/-/ /g; #s/^\s*\S+\s*(\(\d{1,3}\)\s*)?//; # kill the first word - can be j.-math. (2) s/^.*\[[^]]*\]//; # kill the expanded name and name s/\.\s*$//; $wasPage = 0; $wasYear = 0; foreach (split(/,/)) { $wasPage++ if $wasPage>0; if (/^\s*([-\d\/]+(\([-\d\/]+\))?)\s+\((\d{4}(\/\d+)*)\)\s*$/) { # as: 40/41(141/142) (1990/91) if ($add_list{'journal_year'}) { if ($add_list{'journal_year'} ne $3) { {$add_list{'???'} .= "; " . $3;} } if ($wasPage) { $item_list{'pages'} .= ", $1"; } else {$item_list{'pages'}=$1;} } else { $add_list{'journal_year'} = $3; # there is another year field if ($item_list{'volume'}) { if ($wasPage) { $item_list{'pages'} .= ", $1"; } elsif ($item_list{'pages'} eq '') { $_=$1; if (/--/) {$item_list{'pages'} = $_ ;} else { $add_list{'???'} .= "; " . $_; } } else {$add_list{'???'} .= "; " . $1;} } else {$item_list{'volume'} = $1;} } } elsif (/^\s*no\.\s*([-\/\d]+(\s*\([-\/\d]+\))?(\s*exp(\.|\w+)(\s*no\.)?\s*[-\/\d]+)?(\s*[ivxlc]+\.?)?)\s*$/i) { #as: no. 3-4(75-76) Exp. No. 708 #as: no. 6 i. if ($item_list{'number'}) {$add_list{'???'} .= "; " . $_;} else {$item_list{'number'} = $1;} } elsif (/^\s*\((\d{4}(\/d+)*)\)\s*$/) { #as: (1956/57) if ($item_list{'year'}) { if ($item_list{'year'} eq $1) {$add_list{'journal_year'} = $1;} else {$add_list{'???'} .= "; " . $_;} } else {$item_list{'year'} = $1;} } elsif (/^\s*(\d+)\s*$/) { #as: 56 if ($wasPage) {$item_list{'pages'} .= ", $1";$wasPage--;} elsif ($item_list{'volume'} ) { # is it year? if ($1 > 1500 && $1 < 2100) { if ($item_list{'year'} && $item_list{'year'} ne $1) {$add_list{'???'} .= "; " . $_;} else {$item_list{'year'} = $1;} } else { if ($item_list{'pages'}) {$add_list{'???'} .= "; " . $_;} else {$item_list{'pages'} = $1;} } } else {$item_list{'volume'} = $1;} } elsif (/^\s*(pp\.\s*)?(([ivxlcd]+\+)?([-\d]+)|((\w)\d+-+\6\d+))(\s*pp\.?)?\s*$/) { #as: 567--897 or L567--L897 or viii+65 pp. if ($wasPage) {$item_list{'pages'} .= ", $2";$wasPage--;} elsif ($item_list{'pages'}) {$add_list{'???'} .= "; " . $_;} else { $item_list{'pages'} = $2; $wasPage = 1; } } elsif (/^\s*No\.\s+([-\d\/]+(\s*\([-\/\d]+\))?)\s+(pp\.\s*)?([-\d]+)(\s*pp\.?)?\s*$/i) { #as: No. 62 567--897 pp if ($item_list{'number'}) {$add_list{'???'} .= "; " . $_;} else {$item_list{'number'} = $1;} if ($item_list{'pages'}) {$add_list{'???'} .= "; " . $_;} else { $item_list{'pages'} = $4; $wasPage = 1; } } elsif (/^\s*No\.\s+([-\d\/]+(\s*\([-\/\d]+\))?)\s+(pp\.\s*)?([-\d]+)(\s*pp\.?)?(\s*\((\d{4})\))\s*$/i) { #as: No. 62 567--897 pp. (1989) if ($item_list{'number'}) {$add_list{'???'} .= "; " . $_;} else {$item_list{'number'} = $1;} if ($item_list{'pages'}) {$add_list{'???'} .= "; " . $_;} else { $item_list{'pages'} = $4; } if ($item_list{'year'}) { if ($item_list{'year'} eq $7) {$add_list{'journal_year'} = $7;} else {$add_list{'???'} .= "; " . $7;} } else {$item_list{'year'} = $7;} } elsif (/^\s*Numero\s+Hors\s+Serie\s*$/i) { $item_list{'number'} = "Hors Serie" unless $item_list{'number'}; } elsif (/^\s*(suppl\.|part)\s*([-\d\/]+)\s*$/i) { # suppl. 2 $item_list{'number'} .= " $2" ; } elsif (/^\s*((\w+\.?\s+)*sci\.)\s*$/i) { # Phys. Sci. $item_list{'number'} .= ", " . $1 ; } elsif (length($_)>9 && /^\s*(\D+\s+[\divxlcd]+)\s*$/i) { # Voprosy Kvant. Teor. Polya i Statist. Fiz. 5 $item_list{'number'} .= ", " . $1 ; } else {$add_list{'???'} .= "; " . $_;} $wasPage=0 if $wasPage>1; } $item_list{'number'} =~ s/,\s*//; #print join(':',%item_list), "\n"; } sub publisher { # # Dont know how to split into publisher and address # Three cases: with "Inc.,", "Ltd." and "Akad. Nauk" recognized so while # s/^[^-:]+[-:]\s*//; s/[.;,]?\s*$//; $_=$`, $add_list{'publisher_year'}=$1 if /,\s+(\d{4})[,.;]/; # find year $item_list{'publisher'} = $_; $_=$' ? $' : ''; #search rest ($add_list{'publisher_pages'} = $2) =~ s/\s*pp[.,;]?\s*//g if s/(^|[,.:])\s+((((Vol\.\s+[\divxlcdm]+:)?([^.,:]*pp[.,;]?(;\s+)?|pp\.[^.,;]*))|(Vol\.\s+[\divxlcdm]+:)(\sand\s|[\s\divxlcdm+\-])+)+)//i; # possible forms xii+356 pp. or : pp. xii+356 # want also ", Vol. 1: xvi+717 pp.; Vol. 2: xii+458 pp.; # Vol. 3: i--xii and 459--834, " # so: allow Vol. OR pp # i.e. (^|[,.:])\s+($2), where $2=$pages+, # $pages=$Pages | $volumemark $pagesmark+, # $Pages=$volumemark? $pagespart # $pagespart=$partpp | $pppart # $partpp=[^.,:]*pp[.,;]?(;\s+)? # $pppart=pp\.[^.,;]* # $volumemark=Vol\.\s+[\divxlcdm]+: $pagesmark= and | "digits-+" # if (/(^|[,.:])\s+([^.,:]*pp[.,;]|pp\.[^.,;]*)/) { # $add_list{'publisher_pages'} = $2; # $add_list{'publisher_pages'} =~ s/\s*pp[.,;]?\s*//; # } if ($item_list{'publisher'}=~/,\s+/) { $item_list{'publisher'} = $`; $item_list{'address'} = $'; if (($item_list{'publisher'} =~ /^Akad\.\s+Nauk/) || $item_list{'address'}=~/^(Inc|Ltd)\.,/) { if ($item_list{'address'}=~/,\s+/) { $item_list{'publisher'} .= ", "; $item_list{'publisher'} .= $`; $item_list{'address'} = $'; } } } } sub title { s/^[^-:]+[-:]\s+//; s/[.;,]?\s*$//; if (s/vol\.\s+([-\/\divxlcdm]+(,\s*[\divxlcdm]+)*)\s*$//i) { $add_list{'volume'}=$1; s/[.;,]?\s*$//; } $item_list{'title'} = $_; } sub collection_title { $incollection=1; s/^[^-:]+[-:](\s+Collection:)?\s*//; s/[.;,]?\s*$//; # I don't know what "pp. 307--400, 678" means # now the problem: I don't know what to do when where is a volume # both in collection_title and series, and if organization # and volume can be in an arbitrary order. # Hence I delete organization now, ignoring the volume, # and leave volume to the end, when I know if there is a series # volume. # # Moreover, there can be something like # vol. 47 (Russian), pp. 27 (MIT), vol. 14. # processing: while (1) { s/[,.;]?\s*$//; if (s/\(([^()]*)\)\s*$//) #or language? (MIT) or (French) { if (!$item_list{'organization'}) {$item_list{'organization'}=$1;} else {$add_list{'organization?'}.="; ".$1;} next; } if (s/vol\.\s+([-\/\divxlcdm]+(,\s*[\divxlcdm]+)*)\s*$//i) { if (!$item_list{'volume'}) {$item_list{'volume'}=$1;} else {$add_list{'???'}.="; ".$1;} next; } if (s/vol\.\s+([-\/\divxlcdm]+(,\s*[\divxlcdm]+)*),\s*(pp\.\s*)?\s([\divxlcdm]+(-+[\divxlcdm]+)?(,\s+[\divxlcdm]+(-+[\divxlcdm]+)?)*)\s*$//i) { if (!$item_list{'volume'}) {$item_list{'volume'}=$1;} else {$add_list{'???'}.="; ".$1;} if ($add_list{'book_pages'}) { $add_list{'???'}.="; ".$4; } else {$add_list{'book_pages'} = $4;} next; } do { $add_list{'book_pages'} = $2; next; } if !$add_list{'book_pages'} && s/(,|pp\.)\s+([\divxlcdm]+(-+[\divxlcdm]+)?(,\s+[\divxlcdm]+(-+[\divxlcdm]+)?)*)\s*$//; $add_list{'book_pages'} =~ s/,\s*$//; last; } $item_list{'booktitle'} = $_; } #sub document_type { # s/^[^-:]+[-:]\s+//; # s/\.\s*$//; # $item_list{'document_type'} = $_; # #print join(':',%item_list), "\n"; # } sub series { s/^[^-:]+[-:]\s+//; s/[.;,]?\s*$//; $item_list{'volume'} = $1 if s/\s*,\s*([\divxlcdm\-\/]+)\s*$//i; $item_list{'number'} = $1 if s/\s*,\s*No\.\s+(\d+)\s*$//i; $item_list{'volume'} = $1, $item_list{'number'} = $2 if s/\s*,\s*([\divxlcdm\-\/]+),?\s*part\s*([\divxlcdm]+)\s*$//i; $item_list{'series'} = $_; #print join(':',%item_list), "\n"; } sub general { s/^[^-:]+[-:]\s+//; s/[.;,]?\s*$//; $item_list{$_[0]} = $_; #print join(':',%item_list), "\n"; } sub unknown_field { #print join(':',%item_list), "\n"; #print STDERR "unknown field $lname encountered\n"; } #sub reset_list { # local(@tmp); # while (@tmp=each(%item_list)) {$item_list{$tmp[ $[ ]}='';} # #undef %item_list; #memory is tight! #} sub dokey { @authors=split(/ and /,$item_list{ $_[ $[ ] }); @authors=sort @authors; $i=$[; $key=''; do { $authors[$i] =~ s/,.*//; # Delete the first names #$authors[$i] =~ s/^.*\s(\S+)$/$1/; # Delete the titles like von $authors[$i] =~ s/^([a-z]{1,3}\s+)+([A-Z])/$2/; # Delete the titles like von $authors[$i] =~ s/\s//g; # Delete the spaces $key .= &trunk_sound($authors[$i]); #print $key; } while ($i++<2+$[ && $i <= $#authors); $key .= $& if $item_list{'year'} =~ /\d+/; # year can be 1985; 1986, we choose first $key =~ s/\d{2}(\d{2})/\1/; # delete 19 from 1980 $key .= "\u\L$1" if $item_list{'title'} =~ /(\w{3})\w/; #undef @authors; #memory is tight! } sub dotitlekey { $key=$item_list{ $_[ $[ ] }; $key =~ s/\..*//; @authors=($key =~ /(\w{4,})/g); $i=$[; $key=''; do { $tkey = &trunk_sound($authors[$i]); $key .= "\u\L$tkey"; } while ($i++<2+$[ && $i <= $#authors); $key .= $1 if $item_list{'year'} =~ /(\d+)/; # year can be 1985; 1986, we choose first $key =~ s/\d{2}(\d{2})/\1/; # delete 19 from 1980 } sub latexize { return unless $item_list{$_[ $[ ] }; # Capitalize outside of math local(@pieces)=split(/\$/," " . $item_list{$_[ $[ ] } . " "); #add space to avoid losing the first and last "\$" local($i)=0; $item_list{$_[ $[ ] }=''; foreach (@pieces) { s/^\s// unless $i++; s/(.)([A-Z]+)/\1{\2}/g if $i % 2 ; # we assume that capital can never # follow math immediately #$i++ unless /(^|[^\\])(\\\\)*\\$/; if (/(^|[^\\])(\\\\)*\\$/) { # this $ is backslashed $item_list{$_[ $[ ] } .= "\$$_" ; $i++; # now the evenness of $i isn't changed } else { if ($i%2) { # nonmath! $item_list{$_[ $[ ] } .= "\$}$_" ; } else { $item_list{$_[ $[ ] } .= "{\$$_" ; } } } #($item_list{$_[ $[ ] }=join("\$",@pieces)) =~ s/(^\s+)|(\s+$)//g; # there is an extra '$}' in the beginning $item_list{$_[ $[ ] } =~ s/(^\$\}\s*)|(\s+$)//g; $item_list{$_[ $[ ] } =~ s/\\germ\s+/$opt_germ/g; $item_list{$_[ $[ ] } =~ s/\{\\scr\s+/$opt_scr/g; $item_list{$_[ $[ ] } =~ s/\{\\rm\s+/$opt_rm/g; $item_list{$_[ $[ ] } =~ s/\{\\bf\s+([^\}]+)\}/$opt_bf.join("}$opt_bf",split("",\1)).'}'/ge; } sub cs { local($cs,$keymod)=(0,''); #foreach (('title','booktitle','journal','year','volume','number','editor')) foreach (keys(%item_list)) { #now the problem is to reach the system independence (like \13\10=\10) ($_=$item_list{$_}) =~ s/\s+/ /g; $cs += unpack('%32C*',$_) unless $opt_crc32; $cs += &crc32($_) if $opt_crc32; } $cs=1 if $cs==0; while ($cs{$key.$keymod}) { #print stderr $key.$keymod,"\n"; if ($cs{$key.$keymod}==$cs) { print STDERR "Repeated reference $key$keymod skipped in the record $rn of $ARGV.\n"; return 0; } $keymod ? $keymod++ : ($keymod = "A") ; } $key .= $keymod; (print STDERR "Double reference, now $key in the record $rn of $ARGV.\n") if $keymod; $cs{$key}=$cs; ((print w_key_db "$key, ") || die "cannot write to $opt_key_db; disk full?!") if $opt_w_key_db; # divided in 2 parts for possible error in output writing, the second # part is written after the work is finished $cs; } sub trunk_sound { # "ei" is 2 sounds! $_[ $[ ] =~ /^((schtsch)|(shtsh)|(tsch)|(sch)|(wr)|(ck)|(ch)|(ng)|(zh)|(kh)|(sh)|(kn)|(th)|(ph)|(rh)|(wh)|(qu)|(gh)|(ea)|(ee)|(eu)|(au)|(ou)|(oa)|(oo)|(oe)|(ue)|(ae)|(ie)|([a-zA-Z])\31|[a-zA-Z]){0,3}/i; #$_[ $[ ] =~ /^((schtsch)|(shtsh)|(tsch)|(sch)|(wr)|(ck)|(ch)|(ng)|(zh)|(kh)|(sh)|(kn)|(th)|(ph)|(rh)|(wh)|(qu)|(gh)|(ea)|(ee)|(eu)|(au)|(ou)|(oa)|(oo)|(oe)|(ue)|(ae)|(ie)|([a-zA-Z])\31+|[a-zA-Z]){0,3}/i; ($_[ $[ ]=$&) =~ s/([^\Weo])\1$/$1/; $_[ $[ ]; } sub crc32 { $crc = 0xffffffff; # /* preload shift register, per CRC-32 spec */ grep( ($crc = ($crc << 8) ^ $crc32_table[($crc >> 24) ^ unpack('C',$_)]), split("",$_[ $[ ])); return ~$crc; # /* transmit complement, per CRC-32 spec */ } #define CRC32_POLY 0x04c11db7 /* AUTODIN II, Ethernet, & FDDI */ sub init_crc32 { local($i,$c); local($CRC32_POLY)=0x04c11db7 ; foreach (0 .. 255) { $c=$_<<24; foreach (1..8) #(c = i << 24, j = 8; j > 0; --j) {$c = $c & 0x80000000 ? ($c << 1) ^ $CRC32_POLY : ($c << 1);} $crc32_table[$_] = $c; } }