/* PSPP - computes sample statistics.
   Copyright (C) 1997, 1998 Free Software Foundation, Inc.
   Written by Ben Pfaff <blp@gnu.org>.

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
   02111-1307, USA. */

#if !var_h
#define var_h 1

#include "format.h"
#include "common.h"

/* Values. */

/* Describes one value, which is either a floating-point number or a
   short string. */
typedef union value
  {
    /* A numeric value. */
    double f;

    /* A short-string value. */
    unsigned char s[MAX_SHORT_STRING];

    /* This member is used by data-in.c to return a string result,
       since it may need to return a long string.  As currently
       implemented, it's a pointer to a static internal buffer in
       data-in.c.

       Also used by evaluate_expression() to return a string result.
       As currently implemented, it's a pointer to a dynamic buffer in
       the appropriate expression.

       Also used by the AGGREGATE procedure in handling string
       values. */
    unsigned char *c;

    /* Sometimes we insert value's in a hash table. */
    unsigned long hash[SIZEOF_DOUBLE / SIZEOF_LONG];
  }
value;

/* Describes one value label. */
typedef struct
  {
    value v;			/* The value being labeled. */
    char *s;			/* Pointer to malloc()'d label. */
    int ref_count;		/* Reference count. */
  }
value_label;

/* Frequency tables. */

/* Frequency table entry. */
typedef struct
  {
    value v;			/* The value. */
    double c;			/* The number of occurrences of the value. */
  }
freq;

/* Types of frequency tables. */
enum
  {
    FRQM_GENERAL,
    FRQM_INTEGER
  };

/* Entire frequency table. */
typedef struct
  {
    int mode;			/* FRQM_GENERAL or FRQM_INTEGER. */

    /* General mode. */
    struct avl_tree *tree;	/* Undifferentiated data. */

    /* Integer mode. */
    double *vector;		/* Frequencies proper. */
    int min, max;		/* The boundaries of the table. */
    double out_of_range;	/* Sum of weights of out-of-range values. */
    double sysmis;		/* Sum of weights of SYSMIS values. */

    /* All modes. */
    freq *valid;		/* Valid freqs. */
    int n_valid;		/* Number of total freqs. */

    freq *missing;		/* Missing freqs. */
    int n_missing;		/* Number of missing freqs. */

    /* Statistics. */
    double total_cases;		/* Sum of weights of all cases. */
    double valid_cases;		/* Sum of weights of valid cases. */
  }
freq_tab;

/* A complete set of 3 frequency tables. */
typedef struct
  {
    freq_tab miss;		/* Includes user-missing values. */
    freq_tab no_miss;		/* Excludes user-missing values. */
    freq_tab sel;		/* Identical to either miss or no_miss. */
  }
freq_tab_set;

/* Procedures' private per-variable data. */

/* Structure name suffixes for private data:
   _proc: for a procedure (i.e., LIST -> list_proc).
   _trns: for a transformation (i.e., COMPUTE -> compute_trns.
   _pgm: for an input program (i.e., DATA LIST -> data_list_pgm). */

typedef struct variable variable;

/* CROSSTABS private data. */
typedef struct
  {
    /* Integer mode only. */
    int min;			/* Minimum value. */
    int max;			/* Maximum value + 1. */
    int count;			/* max - min. */
  }
crosstab_proc;

/* FREQUENCIES private data. */
enum
  {
    frq_mean = 0, frq_semean, frq_median, frq_mode, frq_stddev, frq_variance,
    frq_kurt, frq_sekurt, frq_skew, frq_seskew, frq_range, frq_min, frq_max,
    frq_sum, frq_n_stats
  };

typedef struct
  {
    /* General mode. */
    freq_tab tab;		/* Frequencies table to use. */

    /* Percentiles. */
    int n_groups;		/* Number of groups. */
    double *groups;		/* Groups. */

    /* Statistics. */
    double stat[frq_n_stats];
  }
frequencies_proc;

/* LIST private data. */
typedef struct
  {
    int newline;		/* Whether a new line begins here. */
    int width;			/* Field width. */
    int vert;			/* Whether to print the varname vertically. */
  }
list_proc;

/* DESCRIPTIVES private data.  Note that the DESCRIPTIVES procedure also
   has a transformation, descriptives_trns. */
enum
  {
    /* As these are used as bit indexes, there must be 32 or fewer.
       Be very careful in adjusting these, see the structure below
       and the table in descriptives.q. */
    dsc_mean = 0, dsc_semean, dsc_stddev, dsc_variance, dsc_kurt,
    dsc_sekurt, dsc_skew, dsc_seskew, dsc_range, dsc_min,
    dsc_max, dsc_sum, dsc_n_stats
  };

typedef struct
  {
    /* Miscellaneous. */
    int dup;			/* Finds duplicates in list of
				   variables. */
    char zname[10];		/* Name for z-score variable. */

    /* Counts. */
    double valid, miss;		/* Valid, missing--general. */

    /* Mean, moments about the mean. */
    double X_bar, M2, M3, M4;
    double min, max;

    /* Statistics. */
    double stats[dsc_n_stats];	/* Everything glommed together. */
  }
descriptives_proc;

/* GET private data. */
typedef struct
  {
    int fv, nv;			/* First, last, # of values. */
  }
get_proc;

/* Sort order. */
enum
  {
    SRT_ASCEND,			/* A, B, C, ..., X, Y, Z. */
    SRT_DESCEND			/* Z, Y, X, ..., C, B, A. */
  };

/* SORT CASES private data. */
typedef struct
  {
    int order;			/* SRT_ASCEND or SRT_DESCEND. */
  }
sort_cases_proc;

/* MODIFY VARS private data. */
typedef struct
  {
    char new_name[9];		/* Variable's new name. */
    int drop_this_var;		/* 0=keep this var, 1=drop this var. */
    variable *next;		/* Next in linked list. */
  }
modify_vars_proc;

/* MEANS private data. */
typedef struct
  {
    double min, max;		/* Range for integer mode. */
  }
means_proc;

/* Different types of variables for MATRIX DATA procedure.  Order is
   important: these are used for sort keys. */
enum
  {
    MXD_SPLIT,			/* SPLIT FILE variables. */
    MXD_ROWTYPE,		/* ROWTYPE_. */
    MXD_FACTOR,			/* Factor variables. */
    MXD_VARNAME,		/* VARNAME_. */
    MXD_CONTINUOUS,		/* Continuous variables. */

    MXD_COUNT
  };

/* MATRIX DATA private data. */
typedef struct
  {
    int vartype;		/* Variable type. */
    int subtype;		/* Subtype. */
  }
matrix_data_proc;

/* MATCH FILES private data. */
typedef struct
  {
    variable *master;		/* Corresponding master file variable. */
  }
match_files_proc;


/* Script variables. */

/* Variable type. */
enum
  {
    NUMERIC,			/* A numeric variable. */
    ALPHA			/* A string variable.  (STRING is pre-empted by lexer.h) */
  };

/* Types of missing values.  Order is significant, see
   mis-val.c:parse_numeric(), sfm-read.c:sfm_read_dictionary()
   sfm-write.c:sfm_write_dictionary(),
   sysfile-info.c:cmd_sysfile_info(), mis-val.c:copy_missing_values(),
   pfm-read.c:read_variables(), pfm-write.c:write_variables(),
   apply-dict.c:cmd_apply_dictionary(), and more (?). */
enum
  {
    MISSING_NONE,		/* No user-missing values. */
    MISSING_1,			/* One user-missing value. */
    MISSING_2,			/* Two user-missing values. */
    MISSING_3,			/* Three user-missing values. */
    MISSING_RANGE,		/* [a,b]. */
    MISSING_LOW,		/* (-inf,a]. */
    MISSING_HIGH,		/* (a,+inf]. */
    MISSING_RANGE_1,		/* [a,b], c. */
    MISSING_LOW_1,		/* (-inf,a], b. */
    MISSING_HIGH_1,		/* (a,+inf), b. */
    MISSING_COUNT
  };

/* A variable's dictionary entry.  Note: don't reorder name[] from the
   first element; a pointer to `variable' should be a pointer to
   member `name'.*/
struct variable
  {
    /* Required by parse_variables() to be in this order.  */
    char name[9];		/* As a string. */
    int index;			/* Index into its dictionary's var[]. */
    int type;			/* NUMERIC or ALPHA. */
    int foo;			/* Used for temporary storage. */

    /* Also important but parse_variables() doesn't need it.  Still,
       check before reordering. */
    int width;			/* Size of string variables in chars. */
    int fv, nv;			/* Index into `value's, number of values. */
    int left;			/* 0=do not LEAVE, 1=LEAVE. */

    /* Missing values. */
    int miss_type;		/* One of the MISSING_* constants. */
    value missing[3];		/* User-missing value. */

    /* Display formats. */
    fmt_spec print;		/* Default format for PRINT. */
    fmt_spec write;		/* Default format for WRITE. */

    /* Labels. */
    struct avl_tree *val_lab;	/* Avltree of value_label structures. */
    char *label;		/* Variable label. */

    /* Per-procedure info. */
    get_proc get;
    union
      {
	crosstab_proc crs;
	descriptives_proc dsc;
	frequencies_proc frq;
	list_proc lst;
	means_proc mns;
	sort_cases_proc srt;
	modify_vars_proc mfv;
	matrix_data_proc mxd;
	match_files_proc mtf;
      }
    p;
  };

/* Cases. */

/* A single case.  (This doesn't need to be a struct anymore, but it
   remains so for hysterical raisins.) */
typedef struct ccase
  {
    value data[1];
  }
ccase;

/* Dictionary. */ 

/* Complete dictionary state. */
typedef struct dictionary
  {
    variable **var;		/* Variable descriptions. */
    struct avl_tree *var_by_name;	/* Variables arranged by name. */
    int nvar;			/* Number of variables. */

    int N;			/* Current case limit (N command). */
    int nval;			/* Number of value structures per case. */

    int n_splits;		/* Number of SPLIT FILE variables. */
    variable **splits;		/* List of SPLIT FILE vars. */
    
    char *label;		/* File label. */

    int n_documents;		/* Number of lines of documents. */
    char *documents;		/* Documents; 80*n_documents bytes in size. */

    int weight_index;		/* `value' index of $WEIGHT, or -1 if none.
				   Call update_weighting() before using! */
    char weight_var[9];		/* Name of WEIGHT variable. */

    char filter_var[9];		/* Name of FILTER variable. */
    /* Do not make another field the last field! or see
       temporary.c:restore_dictionary() before doing so! */
  }
dictionary;

/* This is the active file dictionary. */
extern dictionary default_dict;

/* Transformation state. */

/* Default file handle for DATA LIST, REREAD, REPEATING DATA
   commands. */
extern struct file_handle *default_handle;

/* PROCESS IF expression. */
extern struct expression *process_if_expr;

/* TEMPORARY support. */

/* 1=TEMPORARY has been executed at some point. */
extern int temporary;

/* If temporary!=0, the saved dictionary. */
extern dictionary *temp_dict;

/* If temporary!=0, index into t_trns[] (declared far below) that
   gives the point at which data should be written out.  -1 means that
   the data shouldn't be changed since all transformations are
   temporary. */
extern int temp_trns;

/* If FILTER is active, whether it was executed before or after
   TEMPORARY. */
extern int FILTER_before_TEMPORARY;

void cancel_temporary (void);

/* Functions. */

int is_varname (const char *);
int is_dict_varname (const dictionary *, const char *);

/* Flags for passing to fill_all_vars(). */
enum
  {
    FV_NONE = 0,		/* No flags. */
    FV_NO_SYSTEM = 001,		/* Don't include system variables. */
    FV_NO_SCRATCH = 002		/* Don't include scratch variables. */
  };

void fill_all_vars (variable ***, int *, int flags);

int val_lab_cmp (const void *, const void *, void *);
char *get_val_lab (const variable *, value, int);
void free_val_lab (void *, void *);
void free_value_label (value_label *);
struct avl_tree *copy_value_labels (struct avl_tree *);

void dump_split_vars (const ccase *);

int is_num_user_missing (double, const variable *);
int is_str_user_missing (const unsigned char[], const variable *);
int is_missing (const value *, const variable *);
int is_system_missing (const value *, const variable *);
int is_user_missing (const value *, const variable *);
void copy_missing_values (variable *dest, const variable *src);

int cmp_variable (const void *, const void *, void *);

#if GLOBAL_DEBUGGING
variable *force_create_variable (dictionary *, const char *name,
				 int type, int width);
variable *force_dup_variable (dictionary *, const variable *src,
			      const char *name);
#else
#define force_create_variable(A, B, C, D)	\
	create_variable (A, B, C, D)
#define force_dup_variable(A, B, C)		\
	dup_variable (A, B, C)
#endif

variable *create_variable (dictionary *, const char *name,
			   int type, int width);
void delete_variable (dictionary *, variable *v);
variable *find_variable (const char *name);
variable *find_dict_variable (const dictionary *, const char *name);
void init_variable (dictionary *, variable *, const char *name, int type,
		    int width);
void replace_variable (variable *, const char *name, int type, int width);
void clear_variable (dictionary *, variable *);
void rename_variable (dictionary *, variable *v, const char *new_name);
void discard_variables (void);
void clear_default_dict (void);
void copy_variable (variable *dest, const variable *src);
variable *dup_variable (dictionary *dict, const variable *src,
			const char *name);

variable *update_weighting (dictionary *);
void stop_weighting (dictionary *);

dictionary *save_dictionary (void);
void restore_dictionary (dictionary *);
void free_dictionary (dictionary *);
dictionary *new_dictionary (int copy);

/* Transformations. */

/* Header for all transformations. */
typedef struct trns_header
  {
    /* Index into t_trns[]. */
    int index;

    /* Transformation proc. */
    int (*proc) (struct trns_header *, ccase *);

    /* Garbage collector proc. */
    void (*free) (struct trns_header *);
  }
trns_header, any_trns;

/* Array of transformations */
extern any_trns **t_trns;

/* Number of transformations, maximum number in array currently. */
extern int n_trns, m_trns;

/* Index of first transformation that is really a transformation.  Any
   transformations before this belong to INPUT PROGRAM. */
extern int f_trns;

void add_transformation (any_trns *trns);
void cancel_transformations (void);

/* Variable parsers. */

/* Only parse_variables() supports options other than PV_APPEND,
   PV_SINGLE. */
enum
  {
    PV_NONE = 0,		/* No options. */
    PV_SINGLE = 0001,		/* Restrict to a single varname or TO use. */
    PV_DUPLICATE = 0002,	/* Don't merge duplicates. */
    PV_APPEND = 0004,		/* Append to existing list. */
    PV_NO_DUPLICATE = 0010,	/* Error on duplicates. */
    PV_NUMERIC = 0020,		/* Vars must be numeric. */
    PV_STRING = 0040,		/* Vars must be string. */
    PV_SAME_TYPE = 00100,	/* All vars must be the same type. */
    PV_NO_SCRATCH = 00200	/* Disallow scratch variables. */
  };

variable *parse_variable (void);
variable *parse_dict_variable (dictionary *);
int parse_variables (dictionary *dict, variable ***v, int *nv, int pv_opts);
int parse_DATA_LIST_vars (char ***names, int *nnames, int pv_opts);
int parse_mixed_vars (char ***names, int *nnames, int pv_opts);

#endif /* !var_h */
