/* web.h: prototypes of functions declared in parts of the program other than
 *        the main module.
 */

#define SERVERROOT "/home/http" /* directory to put files in */
#define URLPREFIX ""            /* prefix for re-written URLs */
#define	DEFAULT_FILENAME	"index.html"
#define MAXFOLLOW 100		/* maximum number of '-f' options */
#define WEBCRAWL_VERSION	"1.1"
/* functions in 'path.c': */

void ResolveRelative (const char * path, const char * ref, char * buf);
void GetDirectory (const char *path, char * buf);
FILE * CreateFile (const char * path, const char * host, int port,
		   const char *filename, int bOverwrite);
int file_exists (const char * path, const char * host, int port,
		 const char * filename);
void get_filename(const char * path, const char * host, int port, 
		  const char *filename, char * name, int create_dirs);

/* http.c: */
FILE * Download (char *, int, char *, char *, int, int);

/* global variables */

struct OptStruct
{
    /* URL following options */
    int		bAsk;		/* ask for jump to new server? */
    int		bFollowNone;	/* don't follow anything by default! */
    int         bImageOverride; /* get all inline images? */
    int		nAlwaysFollow;	/* number of elements used in below array */
    const char * pszAlwaysFollow[MAXFOLLOW]; /* substrings to always follow */
    int		nNeverFollow;	/* number of elements used in below array */
    const char * pszNeverFollow[MAXFOLLOW]; /* substrings to never follow */
    FILE * 	fURLLog;	/* file to log off-site URLs */

    /* rewriting options */
    int		bRewrite;	/* rewrite each page with local urls? */
    char	cRewriteMode;	/* 'a', 'l', or 'f' - see usage */
    int         bRename;        /* change filenames to (a) remove metachars
				   and (b) end in correct extensions */
    int         bQueryAddPid;   /* when renaming querys, store pid in name? */

    /* recursion limiting */
    int		nRemote;	/* remote hop count, or -1 for no limit */
    int		nLocal;		/* local limit, as above */

    /* general options */
    const char * pszOutputDir;	/* output dir, relative from pszServerRoot */
    int		bVerbose;	/* set to 1 if verbose mode required */
    
    /* HTTP options */
    char 	* userAgent;	/* eg. 'Mozilla/4.05 [en] (WinNT; I ;Nav)' */ 
    int		timeout;	/* timeout in seconds */
    int		bNoDataTO;	/* no data timeout, or overall timeout? */
};

#define DEF_USER_AGENT  "Mozilla/4.05 [en] (X11; I; Linux 2.0.27 i586; Nav)"
/* Alternative user agents:
   "Mozilla/4.05 [en] (WinNT; I ; Nav)"
   "Mozilla/4.0 (compatible; MSIE 4.01; Windows 95; DIL0001011)" */

const char * pszServerRoot;
const char * pszURLPrefix;

extern struct OptStruct		options;
extern char   lastcontenttype[];

/* definitions for getxref module */

typedef struct xreflist {
    int nrefs;
    char * refs[200];
    int startloc[200], endloc[200];
    int alwaysget[200];
} xreflist;

#define iswhitespace(a) ((a) == ' ' || (a) == '\t' || (a) == '\n' || (a)=='\r')

int getxref(FILE * fp, xreflist * xrefs);

/* functions in url.c */

int relative_url(char * newurl, const char * oldhost, 
		 int oldport, const char * oldfile, char * newhost,
		 int * newport, char * newfile, int alwaysget);
int SplitURL (char *, char *, char *, int *);

/* rewrite.c: */

void rewrite(char * path, const char * host, int port,
	     char * filename, xreflist * xr);

/* rename.c: */
void rename_init();
void rename_readconfig(FILE * fp, const char * filename, int * lineno, 
		       char * buf, int buflen);
char * rename_object(const char * pszHost, int port,
		     char * pszObjectname,  const char * contenttype);

