#include <stdio.h>
#include "keys.h"

void init_key_buf (KeyBuffer* keys)
{
    keys->num = 0; keys->max = 100;
    keys->str = (unsigned char**)malloc(sizeof(char*)*keys->max);
    keys->len = (int*)malloc(sizeof(int)*keys->max);
    memset(keys->len, 0, sizeof(int)*keys->max);
}

void clear_key (KeyBuffer* keys)
{
    keys->num = 0;
}

void push_key (KeyBuffer* keys, unsigned char* start, unsigned char* end)
{
    int len;
    if (start >= end) return;
    if (keys->num >= keys->max) { /* overflow buffer size */
	keys->max += 100;
	if ((keys->str = (unsigned char**)
	     realloc(keys->str, sizeof(char*)*keys->max))
	    == NULL) {
	    fprintf(stderr, "cannot allocate memory kyes->str\n");
	    exit(1);
	}
	if ((keys->len = (int*)realloc(keys->len, sizeof(int)*keys->max))
	    == NULL) {
	    fprintf(stderr, "cannot allocate memory kyes->len\n");
	    exit(1);
	}
	memset((keys->len + keys->max - 100), 0, 100*sizeof(int));
	memset((keys->str + keys->max - 100), 0, 100*sizeof(char*));
    }
    if ((len = (int)(end - start)) >= keys->len[keys->num]) {
	keys->len[keys->num] = len;
	free(keys->str[keys->num]);
	if ((keys->str[keys->num] = (char*)malloc(sizeof(char)*len+1))
	    == NULL) {
	    fprintf(stderr, "cannot allocate memory kyes->str\n");
	    exit(1);
	}	    
    }
    strncpy(keys->str[keys->num], start, len);
    keys->str[keys->num][len] = '\0'; /* start == end к */
    keys->num++;
}

int chartype (unsigned char* s)
{
    if (is_EUC(s)) { /* EUC code */
	if (is_kanaH(s)) {
	    return HIRA;
	} else if (is_kanaK(s)) {
	    return KATA;
	} else if (is_Br(s) || is_Space(s) || is_Line(s)) {
	    return (EUC|SEP);
	} else if (is_Tyou(s)) {
	    return TYOU;
	}
	if (is_TEN(s)) return (EUC|SEP);
	return KANJI;
    } else { /* ASCII code */
	if (*s == '\n') return (NL);
	if (is_kigou(s)) return (ASCII|SEP);
	return ASCII;
    }
}


int parse_by_chartype (KeyBuffer* keys, unsigned char* str)
{
    unsigned char *start, *cr;
    int type, prev, len;

    start = cr = str;
    prev = (SEP|ASCII);
    len = strlen(str);
    
    clear_key(keys);
    while (*cr != '\0') {
	if (cr - str >= len) {
	    cr = str + len; break;
	}
	type = chartype(cr);
	if (type & SEP) {
	    if (start < cr) {
		if (type == NL && (prev & EUC)) {break;}
		push_key(keys, start, cr);
	    }
	    prev = type;
	    cr += ((type & EUC)? 2:1);
	    start = cr;
	} else if (type & ASCII) {
	    if (!(prev & ASCII)) {
		push_key(keys, start, cr);
		start = cr;
	    }
	    prev = type;
	    cr++;
	} else {
/*	    printf("EUC type = %x, %s\n", type, cr);*/
	    if ( prev != type && type != TYOU) {
		push_key(keys, start, cr);
		start = cr;
	    }
	    if (type != TYOU) { prev = type; }
	    cr += 2;
	}
    }
    if (start != cr) {
	push_key(keys, start, cr);
	return (cr - start);
    } else {
	return 0;
    }
}

int parse_by_ngram (KeyBuffer* keys, unsigned char* str, int N)
{
    int len;
    char *cr, *start;

    cr = str;

    len = 0;
    clear_key(keys);

    while (*cr != '\0') {
	if (is_EUC(cr)) {  /* EUC code */
	    cr += 2;
	    if (++len >= N) {
		push_key(keys, cr-(len*2), cr);
		len--;
	    }
	} else { /* ASCII code */
	    if (*cr == '\n') break;
	    while (len > 0) {
		push_key(keys, cr-(len*2), cr);
		len--;
	    }
	    start = cr;
	    while (!is_EUC(cr) && !is_space(cr) && *cr != '\0') cr++;
	    if (start != cr) {
	       push_key(keys, start, cr);
	    }
	    if (is_space(cr)) cr++;
	    len = 0;
	}
    }
    if (len > 0) {
	push_key(keys, cr-len*2, cr);
	return (len*2);
    }

    return 0;
}

/***********************************************************************/
int parse_by_mozdic (KeyBuffer* keys, unsigned char* str, SUFARY* sa)
{
    int i, lastp, keylen, undef_mode;
    unsigned long *rslt;
    unsigned char *key;
    char* undefp = NULL;
    char lastc;

    lastp = 0; clear_key(keys); undef_mode = 0;
    while (*str != '\0' && *str != '\n') {
	if (is_Alpha(str)) {
	    if (undef_mode && undef_mode != 3) {
		push_key(keys, undefp, str); undefp = NULL;
	    }
	    undef_mode = 3;
	} else if (is_kanaK(str) || (undef_mode == 2 && is_Tyou(str))) {
	    /* ʤƷҤ */
	    if (undef_mode && undef_mode != 2) {
		push_key(keys, undefp, str); undefp = NULL;
	    }
	    undef_mode = 2;
	} else {
	    sa_reset(sa);
	    rslt = sa_common_prefix_search(sa, str, '\0');
	    if (rslt[0] < 1) {
		if (undef_mode && undef_mode != 1) {
		    push_key(keys, undefp, str); undefp = NULL;
		}
		undef_mode = 1;
	    } else
		undef_mode = 0;
	}
	if (undef_mode) { /* ̤  */
	    if (undefp == NULL) undefp = str;
	    if (is_EUC(str)) {
		lastp -= 2; str += 2;
	    } else {
		if (is_space(str)) {
		    push_key(keys, undefp, str);
		    undefp = NULL; undef_mode = 0;
		}
		lastp -= 1; str += 1;
	    }
	    continue;
	} else if (undefp != NULL) {
	    if (lastp < 0) push_key(keys, undefp, str);
	    undefp = NULL;
	}

	key = sa_txtidx2txtptr(sa, rslt[rslt[0]]);
	keylen = strlen(key);
	if (keylen > lastp) {
	    /*printf("| %x\t%s\t%d\n", rslt[rslt[0]], key, keylen);*/
	    if (keylen > 0) push_key(keys, key, (key+keylen));
	    lastc = *(str+keylen);
	    if (lastc == '\0' || lastc == '\n') break;
	    if (is_EUC(str)) {
		lastp = keylen - 2;
		str += 2;
	    } else {
		lastp = keylen - 1;
		str++;
	    }
	} else {
	    if (is_EUC(str)) {
		lastp -= 2; str += 2;
	    } else {
		lastp -= 1; str += 1;
	    }
	}
	free(rslt);
    }
    if (undefp != NULL) push_key(keys, undefp, str);
    return 1;
}
