#include #include #include #include"library.h" #include"parse.h" /* returnwerte: -1: option error -2: file error -3: out of memory */ #define tolower(c) (('A'<=(c))&&((c)<='Z')? (c)^0x20 : (c)) #define islower(c) (('a'<=(c))&&((c)<='z')) int checking = 0, merging = 0, case_sensitive = 0; int first_checking = 1; int newlibcreate = 1; int library_statistics = 0; void print_help(void); void lowercase(unsigned char *string); void write_statistics( library *mainlib ); int main(int argc, const unsigned char *argv[]) { unsigned char *wort; FILE *texfile, *outfile, *infofile, *newlibfile; library *mainlib, *newlib; const unsigned char *newlibname=NULL, *outputname=NULL, *texfilename = NULL, *infofilename = NULL; unsigned char libnamespace[80], infofilenamespace[] = "TeXortho.inf"; printf("This is TeXortho, version 1.0, by J.Hannappel and E.Werner \n"); printf("written with GNU C, 1991\n\n"); if ( argc < 3 ) { print_help(); return(-1); } punctuation_check = 1; new_sentence = 1; /* 1.11.91 */ infofilename = infofilenamespace; outfile = stdout; mainlib = library_create( 30000 ); /********** parsen der command line */ { int i; int inputfilefound = 0; int optionfound = 0 , libfilefound = 0; for ( i = 1; i < argc; i++ ) { if ( argv[i][0] == '-') /* wir haben eine option */ { switch( argv[i][1]) { case 'h': /* libhist schreiben */ library_statistics = 1; break; case 'i': infofilename = argv[++i]; break; case 'l': /* name des erzeugten libfiles, default *.lib */ newlibname = argv[++i]; break; case 'o': /* name des outfiles, default *.out */ outputname = argv[++i]; break; case 'C': /* case sensitive search */ case_sensitive = 1; break; case 'm': /* merge file with lib */ optionfound = 1; merging = 1; break; case 'c': /* check file for spelling errors */ checking = 1; optionfound = 1; break; case 'p': /* supress check of isolated punctuation chars */ punctuation_check = 0; break; case 'n': /* don't create libfile */ newlibcreate = 0; break; case 'P': /* supress check of capital letters at the beginnings of sentences */ first_checking = 0; break; default: printf("invalid option! aborting...\n" ); return(-1); } } else /* wir haben keine option */ { if (inputfilefound) { FILE *libfile; libfilefound = 1; if(!(libfile = fopen( argv[i],"r"))) { printf("could not open word list %s \n",argv[i]); return(-2); } printf("(%s",argv[i]); if(!library_read( mainlib, libfile )) { printf("could not read word list %s \n",argv[i]); return(-3); } fclose( libfile ); printf(")"); } /* if inputfilefound */ else /* dann isses das texfile */ { texfilename = argv[i]; inputfilefound = 1; if( newlibname == NULL ) { strcpy(libnamespace,argv[i]); newlibname = libnamespace; strcpy(strrchr(newlibname,'.'), ".twl"); } /* if newlibname == NULL */ } } /* else no option */ } /* for i < argc */ if( !optionfound || !inputfilefound) { printf("something's missing; aborting...\n"); return(-1); } } /* lokale klammer */ if( merging && checking ) { printf("-c and -m are incompatible options !\n"); return(-1); } if( !merging ) { if(!( texfile = fopen( texfilename,"r"))) { printf("could not open textfile %s \n",texfilename); return(-2); } printf("(%s",texfilename); if ( outputname ) { if(!( outfile = fopen( outputname,"w"))) { printf("could not open outputfile %s \n",outputname); return(-2); } printf("(%s",outputname); } if(!( infofile = fopen( infofilename,"r"))) { printf("could not open infofile %s \n",infofilename); return(-2); } printf( "(%s", infofilename); init_parser( infofile, texfile, outfile ,texfilename); fclose(infofile); printf(")"); } if( newlibcreate && !merging ) if( NULL == ( newlib = library_create( 30000 ))) { printf("could not create a new word list, probably out of memory...\n"); return( -2 ); } if( merging ) { FILE *newlibfile; printf("merging %s to %s ...\n", texfilename, newlibname ); if (!(newlibfile = fopen( texfilename, "r"))) { printf("could not open word list file %s \n",texfilename); return(-2); } printf("(%s",texfilename); library_read(mainlib, newlibfile ); fclose( newlibfile ); printf(")"); if (!(newlibfile = fopen( newlibname, "w"))) { printf("could not open otput library %s \n",newlibname); return(-2); } printf("(%s",newlibname); printf(" reorganizing...\n"); library_reorganize(mainlib); printf(" writing...\n"); library_write( mainlib, newlibfile ); fclose(newlibfile); printf(")\n"); if ( library_statistics ) write_statistics( mainlib ); library_delete( mainlib ); return(0); } /*************************** nun bis zum TeXfileende ******************/ printf("start parsing...\n"); while ( wort = next_word( &new_sentence )) { if ( first_checking ) if ( new_sentence ) { new_sentence = 0; if ( !check_first_char( wort) ) fprintf( outfile,"%s:%d: capital letter expected after presumed end of sentence\n", texfilename, zeilennummer ); } if( !case_sensitive )lowercase( wort ); if(library_find_entry( mainlib, wort )); else { if (newlibcreate) library_enter_entry( newlib, wort, 1 ); fprintf(outfile, "%s:%d: unknown %s\n", texfilename, zeilennummer, wort); } } if ( library_statistics ) write_statistics( mainlib ); library_delete( mainlib ); if (outfile != stdout) { fclose( outfile ); printf(")\n"); } fclose( texfile ); printf(")\n"); if( newlibcreate ) { if (!(newlibfile = fopen( newlibname, "w"))) { printf("could not open output word list %s \n",newlibname); return(-2); } printf("(%s",newlibname); library_write( newlib, newlibfile ); fclose(newlibfile); printf(")\n"); library_delete( newlib ); } return(0); } /* end of main */ void print_help(void) { printf("possible options:\n"); printf(" -c\tcheck textfile for errors\n"); printf(" -m\tmerge word lists\n"); printf("exactly one of the above options is required\n"); printf(" -i\tnext parameter is the info file name; default: TeXortho.inf\n"); printf(" -l\tnext parameter is the output word list name; default: jobname.twl\n"); printf(" -o\tnext parameter is the output file name; default: jobname.out\n"); printf(" -C\tcase sensitive search; be sure your library is made for that!\n"); printf(" -p\tsupress punctuation check\n"); printf(" -P\tsupress check for capital letters at the beginnings of sentences\n"); printf(" -n\tdon't create output word list\n"); printf(" -h\tdo write a library histogram libhist\n"); printf("\nfirst file name is TeX(t) file, the others are word lists\n"); printf("for additional help, check the manual\n"); } void lowercase( unsigned char *string ) { while ( *string = tolower(*string) )string++; } /* lowercase() */ void write_statistics( library *mainlib ) { FILE *newlibfile; if ((newlibfile = fopen( "libhist", "w"))) { long tree_hist[250]; long weight_hist[250]; long deepest_branch; int i; long oldnodes; float treespace; printf("writing library statistics on LIBHIST\n"); for (i=0;i<250;i++) tree_hist[i] = weight_hist[i] = 0; deepest_branch = 0; library_fill_hist( mainlib,tree_hist,weight_hist,249,&deepest_branch ); fprintf(newlibfile,"deepest branch is %d deep!\n",deepest_branch); oldnodes = 0; treespace = 1.0; for ( i = 0; i < (deepest_branch < 249? deepest_branch+1: 250); i++ ) { fprintf(newlibfile,"%2d %10d %10d %10d %10.6f %10.3f\n" ,i,tree_hist[i],weight_hist[i],oldnodes - tree_hist[i], (float)tree_hist[i]/treespace, (float)weight_hist[i]/(float)tree_hist[i]); oldnodes = tree_hist[i] * 2; treespace *= 2.0; } fclose(newlibfile); } else printf("could not open word list statistics file....\n"); }