/*-
 * parse.c - parse a sentence
 *
 * Copyright (c) 1996, 1997, 2000 Nara Institute of Science and Technology
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. All advertising materials mentioning features or use of this software
 *    must display the following acknowledgement:
 *      This product includes software developed by Nara Institute of 
 *      Science and Technology.
 * 4. The name Nara Institute of Science and Technology may not be used to
 *    endorse or promote products derived from this software without specific
 *    prior written permission.
 *    
 *
 * THIS SOFTWARE IS PROVIDED BY Nara Institute of Science and Technology 
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 
 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 
 * PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE Nara Institute
 * of Science and Technology BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 
 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 
 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 
 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * Modified by: A.Kitauchi <akira-k@is.aist-nara.ac.jp>, Oct. 1996
 *
 * $Id: parse.c,v 1.13.4.1 2000/12/18 20:08:31 masayu-a Exp $
 */

#include "chalib.h"
#include "pat.h"
#include "sufary.h"

#define MRPH_NUM	        1024
#define PATH1_NUM		256

#define HANKAKU            	0x80
#define PRIOD            	0xa1a5
#define CHOON            	0xa1bc
#define KIGOU            	0xa3b0
#define SUJI           	        0xa3c0
#define ALPH            	0xa4a0
#define HIRAGANA                0xa5a0
#define KATAKANA                0xa6a0
#define GR                      0xb0a0
#define KANJI                   0xffff
#define ILLEGAL                 1

#define is_spc(c)    ((c)==' '||(c)=='\t')

mrph2_t *Cha_mrph = NULL;
path_t *Cha_path = NULL;
int Cha_path_num;


/***********************************************************************
 * malloc_chars
 ***********************************************************************/
#define CHA_MALLOC_SIZE (1024 * 64)
#define malloc_char(n)     malloc_chars(1, n)
#define malloc_short(n)    malloc_chars(2, n)
#define malloc_int(n)      malloc_chars(4, n)
#define free_chars()       malloc_chars(0, 0)
static void *malloc_chars(size, nitems)
    int nitems, size;
{
    static char *buffer_ptr[128];
    static int buffer_ptr_num = 0;
    static int buffer_idx = CHA_MALLOC_SIZE;

    if (nitems == 0) {
	/* free */
	if (buffer_ptr_num > 0) {
	    while (buffer_ptr_num > 1)
	      free(buffer_ptr[--buffer_ptr_num]);
	    buffer_idx = 0;
	}
	return NULL;
    } else {
	if (size > 1) {
	    /* size ǳ꤭ͤ */
	    buffer_idx+= size - (buffer_idx & (size - 1));
	    nitems *= size;
	}

	if (buffer_idx + nitems >= CHA_MALLOC_SIZE) {
	    if (buffer_ptr_num == 128)
	      cha_exit(1, "Can't allocate memory");
	    buffer_ptr[buffer_ptr_num++] = cha_malloc(CHA_MALLOC_SIZE);
	    buffer_idx = 0;
	}

	buffer_idx += nitems;
	return buffer_ptr[buffer_ptr_num - 1] + buffer_idx - nitems;
    }
}

static void *malloc_free_block(ptr, nblockp, size, do_free)
    void *ptr;
    int *nblockp, size, do_free;
{
    if (do_free) {
	/* free and malloc one block */
	if (*nblockp > 1) {
#if 0
	    printf("# free block (%d)\n",size); fflush(stdout);
#endif
	    free(ptr);
	    *nblockp = 0;
	}
	if (*nblockp == 0)
	  ptr = malloc_free_block(ptr, nblockp, size, 0);
    } else {
	/* realloc one block larger */
	if (*nblockp == 0)
	  ptr = malloc(size * ++*nblockp);
	else {
#if 1
	    ptr = realloc(ptr, size * ++*nblockp);
#else
	    {
		char *ptr2;
		ptr2 = cha_malloc(size * (*nblockp + 1));
		memcpy(ptr2, ptr, size * *nblockp);
		(*nblockp)++;
		free(ptr);
		ptr = ptr2;
	    }
#endif
#if 0
	printf("# %s block (%d*%d)\n",*nblockp?"realloc":"malloc",size,*nblockp); 
	fflush(stdout);
#endif
	}
#if 0
	if (ptr == NULL)
	  printf("# Can't allocate memory"); fflush(stdout);
#endif
    }

    return ptr;
}

#define malloc_path()  malloc_free_path(0)
#define free_path()    malloc_free_path(1)
static int malloc_free_path(do_free)
    int do_free;
{
    static int nblock = 0;

#if 0
    printf("# path %d:%d ", nblock, Cha_path_num);
#endif

    Cha_path = malloc_free_block((void *)Cha_path, &nblock,
			     sizeof(path_t) * CHA_PATH_NUM, do_free);

    return Cha_path == NULL;
}

#define malloc_mrph()  malloc_free_mrph(0)
#define free_mrph()    malloc_free_mrph(1)
static int malloc_free_mrph(do_free)
    int do_free;
{
    static int nblock = 0;

#if 0
    printf("# mrph %d ", nblock);
#endif
    Cha_mrph = malloc_free_block((void *)Cha_mrph, &nblock,
			     sizeof(mrph2_t) * MRPH_NUM, do_free);

    return Cha_mrph == NULL;
}

#if 0
/***********************************************************************
 * check_code()
 ***********************************************************************/
static int check_code(str)
    char *str;
{
    int	code;
    unsigned char *s = (unsigned char *)str;

    /* nyuuryoku chuuni hankaku space wo yurusu, by. T.U. '96.01.10 */
#if 1
    if (*s == '\0' || *s == ' ' || *s == '\r' || *s == '\n')
      return 0;
#else
    if (*s == '\0')
      return 0;
#endif
    else if (*s < HANKAKU)
      return HANKAKU;
    else if (*(s+1) < HANKAKU)
      return ILLEGAL;

    code = *s * 256 + *(s + 1);

    if (code == PRIOD)        return PRIOD;
    else if (code == CHOON)   return CHOON;
    else if (code < KIGOU)    return KIGOU;
#if 0
    else if (code < SUJI)     return SUJI;
#endif
    else if (code < ALPH)     return ALPH;
    else if (code < HIRAGANA) return HIRAGANA;
    else if (code < KATAKANA) return KATAKANA;
    else if (code < GR)       return GR;
    else return KANJI;
}
#endif

#if 0
/***********************************************************************
 * undef_mrph_len - ̤ĹĴ٤
 *
 * Ҥ餬ʡ޻ʤ: 1ʸ
 * : Ϣ³ʸ
 ************************************************************************/
static int undef_mrph_len(target)
    char *target;
{
    int code, next_code;
    int len = 0;

    code = check_code(target);

    if (code == HIRAGANA || code == KANJI)
      return 2;

    do {
	if (code == HANKAKU || code == ILLEGAL)
	  len++;
	else
	  len += 2;
	next_code = check_code(target + len);
    } while (next_code == code
	     || (code == KATAKANA && next_code == CHOON)
	     || (code == ALPH     && next_code == PRIOD));

    return len;
}
#endif

/***********************************************************************
 * register_undef_mrph1 - ̤Хåեɲ
 ***********************************************************************/
static int register_undef_mrph1(target, mrph_idx, undef_len, no)
    char *target;
    int mrph_idx, undef_len, no;
{
#if 0
    int undef_len;
#endif
    mrph2_t *mrph = &Cha_mrph[mrph_idx];

#if 0
    undef_len = undef_mrph_len(target);
#endif

#if 0
    mrph->midasi = (char *)malloc_char(undef_len + 1);
    memcpy(mrph->midasi, target, undef_len);
    mrph->midasi[undef_len] = '\0';
#else
    mrph->midasi = target;
#endif
    mrph->yomi = "";
    mrph->base_length = mrph->length = undef_len;
    mrph->base = "";
    mrph->pron = "";
    mrph->comp = "\n";

    mrph->hinsi = Cha_undef_info[no].hinsi;
    mrph->con_tbl = Cha_undef_info[no].con_tbl;
    mrph->ktype = 0;
    mrph->kform = 0;
    mrph->is_undef = no + 1; /* ̤ */
    mrph->weight = MRPH_DEFAULT_WEIGHT;
    mrph->info = ""; /* ղþ϶ʸȤ롥 */

    if (++mrph_idx % MRPH_NUM == 0 && malloc_mrph())
      return FALSE;

    return TRUE;
}

#if 0
static int register_undef_mrph(target, mrph_idx, undef_len)
    char *target;
    int mrph_idx, undef_len;
{
    int no;

    for (no = 0; no < Cha_undef_info_num; no++)
      if (register_undef_mrph1(target, mrph_idx+no, undef_len, no) == FALSE)
	return FALSE;

    return TRUE;
}
#endif

/*
 * register_mrph - ѤĴ٤ʤǤХåեɲ
 *
 * Retern value:
 * If successful, this rutine returns the number of morphs
 * added to the buffer. If an error occurs, return -1.
 */
static int register_mrph(int mrph_idx)
{
    int new_mrph_idx = mrph_idx;
    mrph2_t *new_mrph = &Cha_mrph[mrph_idx];

    if (!new_mrph->ktype) {
	/* Ѥʤ */
	if (++new_mrph_idx % MRPH_NUM == 0 && malloc_mrph())
	  return -1;
    } else {
	/* Ѥ */
	if (new_mrph->kform) {
	    /* 촴ʤ */
	    new_mrph->base_length = 0;
	    new_mrph->yomi = "";
	    new_mrph->pron = "";
	    if (++new_mrph_idx % MRPH_NUM == 0 && malloc_mrph())
	      return -1;
	} else {
	    /* 촴 */
	    int f;
	    int ktype = new_mrph->ktype;
	    int baselen = new_mrph->length;
	    int con_tbl = new_mrph->con_tbl;
	    char *follows = new_mrph->midasi + baselen;
	    int new_mrph_idx0 = new_mrph_idx;
	    for (f = 1; Cha_form[ktype][f].name; f++) {
		if (!Cha_form[ktype][f].gobi[0] ||
		    (follows[0] == Cha_form[ktype][f].gobi[0] &&
		     !memcmp(follows, Cha_form[ktype][f].gobi, Cha_form[ktype][f].gobi_len))) {
		    if (new_mrph_idx != new_mrph_idx0)
		      *new_mrph = Cha_mrph[new_mrph_idx0];
		    new_mrph->kform = f;
		    new_mrph->length = baselen + Cha_form[ktype][f].gobi_len;
		    new_mrph->con_tbl = con_tbl + f - 1;
		    if (++new_mrph_idx % MRPH_NUM == 0 && malloc_mrph())
		      return -1;
		    new_mrph = &Cha_mrph[new_mrph_idx];
		}
	    }
	}
    }

    return new_mrph_idx - mrph_idx;
}

/*
 * convert_mrphs - ǤХåեɲ
 * 
 * Retern value:
 * If successful, this rutine returns the number of morphs
 * added to the buffer. If an error occurs, return -1.
 */
static int convert_mrphs(char *target, char **dic_buffer, int mrph_idx)
{
    int nmrph;
    int new_mrph_idx = mrph_idx;
    char **pbuf;

    for (pbuf = dic_buffer; *pbuf; pbuf++) {
#if 0
	fprintf(stdout, "line: %s\n", *pbuf); fflush(stdout);
#endif
	cha_get_mrph_data(&Cha_mrph[new_mrph_idx], *pbuf, target);
#if 0
	printf("%s %d %d %d\n",
	       Cha_mrph[new_mrph_idx].yomi,
	       Cha_mrph[new_mrph_idx].hinsi,
	       Cha_mrph[new_mrph_idx].ktype,
	       Cha_mrph[new_mrph_idx].kform);
#endif
	nmrph = register_mrph(new_mrph_idx);
	if (nmrph < 0)
	  return -1;
	new_mrph_idx += nmrph;
    }

    return new_mrph_idx - mrph_idx;
}

/*
 * collect_mrphs_for_pos()
 */
static int collect_mrphs_for_pos(pos, p_idx)
    int pos, *p_idx;
{
    static int p_start;
    int i, j;

    j = 0;
    if (pos == 0) {
	/* new sentence */
	p_idx[j++] = 0;
	p_start = 1;
    } else {
	for (i = p_start; i < Cha_path_num; i++) {
	    if (Cha_path[i].end <= pos) {
		if (i == p_start)
		  p_start++;
		if (Cha_path[i].end == pos)
		  p_idx[j++] = i;
	    }
	}
    }
    p_idx[j] = -1;

    return j;
}

/***********************************************************************
 * check_connect()
 ***********************************************************************/
static int check_connect(pos, m_num, p_idx)
    int pos, m_num, *p_idx;
{
    /* ֤ͤǥѥʬह */
    typedef struct _path_cost_t {
	int min_cost;
	short min_cost_no;
	short state;
	short num;
	int   cost[PATH1_NUM];
	int   pno[PATH1_NUM];
    } path_cost_t;

/*
    static short best_start, best_end, best_state;
    static int best_cost;
*/
    static path_cost_t pcost[PATH1_NUM];
    int pcost_num;
    mrph2_t *new_mrph;
    int i, pno, pcostno;
    int	haba_cost, con_cost, cost, mrph_cost;
    int con_tbl, next_state;

#ifdef DEBUG
    printf("[m:%d] ", m_num);
#endif
    new_mrph = &Cha_mrph[m_num];
    con_tbl = new_mrph->con_tbl;

    pcost[0].state = -1;
    pcost_num = 0;

    for (i = 0; (pno = p_idx[i]) >= 0; i++) {
#if 0
	if (i>0&&pcost[0].min_cost<Cha_path[pno].cost)
	  continue;
#endif
	/* ȥޥȥĴ٤Ƽ֤³ȤФ */
	next_state = cha_check_automaton
	  (Cha_path[pno].state,
#ifdef KOCHA
	   Cha_path[Cha_path[pno].path[0]].state,
	   Cha_mrph[Cha_path[pno].mrph_p].midasi,
	   Cha_mrph[Cha_path[pno].mrph_p].is_undef,
#endif
	   con_tbl, Cha_con_cost_undef, &con_cost);

	if (con_cost == -1) continue;

#ifdef DEBUG
	printf("[%3d, %3d, pos:%d, len:%d, state:%5d,%5d, cost:%d, undef:%d]\n",
	       Cha_path[pno].mrph_p,m_num,pos,new_mrph->length,Cha_path[pno].state,next_state,cost,new_mrph->is_undef);
#endif
	/* cost ׻ */
	cost = Cha_path[pno].cost + con_cost * Cha_con_cost_weight;

	/* ɤ pcost °뤫Ĵ٤ */
	for (pcostno = 0; pcostno < pcost_num; pcostno++)
	  if (next_state == pcost[pcostno].state)
	    break;
	if (pcostno < pcost_num) {
	    /* tricky: when Cha_cost_width is -1, ">-1" means ">=0" */
	    if (cost - pcost[pcostno].min_cost > Cha_cost_width)
	      continue;
	} else {
	    /*  pcost  */
	    pcost_num++;
	    pcost[pcostno].num = 0;
	    pcost[pcostno].state = next_state;
	    pcost[pcostno].min_cost = INT_MAX;
	}

	/* pcost Ͽ */
	if (Cha_cost_width < 0) {
	    pcost[pcostno].min_cost = cost;
	    pcost[pcostno].pno[0] = pno;
	} else {
	    pcost[pcostno].cost[pcost[pcostno].num] = cost;
	    pcost[pcostno].pno[pcost[pcostno].num] = pno;
	    if (cost < pcost[pcostno].min_cost) {
		pcost[pcostno].min_cost = cost;
		pcost[pcostno].min_cost_no = pcost[pcostno].num;
	    }
	    pcost[pcostno].num++;
	}
    }

    if (pcost_num == 0)
      return TRUE;

    /* ǥ */
    if (new_mrph->is_undef) {
	mrph_cost = Cha_undef_info[new_mrph->is_undef-1].cost
	  + Cha_undef_info[new_mrph->is_undef-1].cost_step * new_mrph->length / 2;
    } else {
	mrph_cost = Cha_hinsi[new_mrph->hinsi].cost;
    }
    mrph_cost *= new_mrph->weight * Cha_mrph_cost_weight;

#ifdef KOCHA
    if (new_mrph->midasi &&
	(unsigned char)new_mrph->midasi[0] == 0xA1 &&
	(unsigned char)new_mrph->midasi[1] == 0xA1) {
	for (i = 0; i < pcost[0].num; i++) {
	    int *new_path;
	    Cha_path[Cha_path_num].path = malloc_int(2);
	    Cha_path[Cha_path_num].path[0] = pcost[0].pno[i];
	    Cha_path[Cha_path_num].path[1] = -1;

	    Cha_path[Cha_path_num].cost = pcost[0].cost[i] + mrph_cost;
	    Cha_path[Cha_path_num].mrph_p = m_num;
	    Cha_path[Cha_path_num].state = pcost[0].state;
	    Cha_path[Cha_path_num].start = pos;
	    Cha_path[Cha_path_num].end = pos + new_mrph->length;
#ifdef KOCHA_DEBUG
	    printf("[Sp:%d,prev:%d,m:%d,c:%d,pc:%d]\n",
		   Cha_path_num,Cha_path[Cha_path_num].path[0],m_num,pcost[0].cost[i],Cha_path[Cha_path_num].cost);
#endif
	    if (++Cha_path_num % CHA_PATH_NUM == 0 && malloc_path())
	      return FALSE;
	}
    } else {
#endif /* KOCHA */
#if 0
	if (Cha_path[Cha_path_num].end == pos + new_mrph->length &&
	    Cha_path[Cha_path_num].state == pcost[pcostno].state &&
	    Cha_path[Cha_path_num].cost > pcost[pcostno].min_cost + mrph_cost) {
	    return TRUE;
	}
#endif

	for (pcostno = 0; pcostno < pcost_num; pcostno++) {
	    /* ˤޤäƤѥȴФ */
	    if (Cha_cost_width < 0) {
		Cha_path[Cha_path_num].path = malloc_int(2);
		Cha_path[Cha_path_num].path[0] = pcost[pcostno].pno[0];
		Cha_path[Cha_path_num].path[1] = -1;
	    } else {
		int npath = 0;
		int path[PATH1_NUM];
		haba_cost = pcost[pcostno].min_cost + Cha_cost_width;
		path[npath++] = pcost[pcostno].pno[pcost[pcostno].min_cost_no];
		for (i = 0; i < pcost[pcostno].num; i++)
		  if (pcost[pcostno].cost[i] <= haba_cost && i != pcost[pcostno].min_cost_no)
		    path[npath++] = pcost[pcostno].pno[i];
		path[npath++] = -1;
		memcpy(Cha_path[Cha_path_num].path = malloc_int(npath),
		       path, sizeof(int) * npath);
	    }

	    /* Cha_path Ͽ */
	    Cha_path[Cha_path_num].cost = pcost[pcostno].min_cost + mrph_cost;
	    Cha_path[Cha_path_num].mrph_p = m_num;
	    Cha_path[Cha_path_num].state = pcost[pcostno].state;
	    Cha_path[Cha_path_num].start = pos;
	    Cha_path[Cha_path_num].end = pos + new_mrph->length;
#if 0
	    if (Cha_path[Cha_path_num].start < best_start ||
		Cha_path[Cha_path_num].end > best_end ||
		Cha_path[Cha_path_num].end == best_end &&
		Cha_path[Cha_path_num].state == best_state &&
		Cha_path[Cha_path_num].cost < best_cost) {
		best_start = Cha_path[Cha_path_num].start;
		best_end = Cha_path[Cha_path_num].end;
		best_state = Cha_path[Cha_path_num].state;
		best_cost = Cha_path[Cha_path_num].cost;
	    }
#endif
#if 0
	    printf("[%4d,%4d,%5d, %5d]\n",Cha_path[Cha_path_num].start,Cha_path[Cha_path_num].end,Cha_path[Cha_path_num].state,Cha_path[Cha_path_num].cost);
#endif
#ifdef DEBUG
	    printf("%3d %3d %5d [p:%d,prev:%d,m:%d,c:%d,pc:%d]\n",
		   Cha_path[Cha_path_num].start, Cha_path[Cha_path_num].end,
		   Cha_path[Cha_path_num].state,
		   Cha_path_num,Cha_path[Cha_path_num].path[0],m_num,pcost[0].cost[i],Cha_path[Cha_path_num].cost);
#endif
	    if (++Cha_path_num % CHA_PATH_NUM == 0 && malloc_path())
	      return FALSE;
	}
#ifdef KOCHA
    }
#endif
    return TRUE;
}

static void set_mrph_end(mrph)
    mrph2_t *mrph;
{
    mrph->midasi = mrph->yomi = mrph->info = "";
    mrph->base = mrph->pron = "";
    mrph->comp = "\n";
    mrph->base_length = mrph->length = 3;

    mrph->hinsi = 0;
    mrph->ktype = 0;
    mrph->kform = 0;
    mrph->con_tbl = 0;
    mrph->is_undef = 0;
    mrph->weight = MRPH_DEFAULT_WEIGHT;
}

static int set_mrph_bkugiri()
{
    static int bkugiri_num;
    int h;
    mrph2_t *mrph;

    if (Cha_mrph[1].midasi)
      return bkugiri_num;

    for (h = 0; Cha_hinsi[h].name; h++) {
	if (!Cha_hinsi[h].bkugiri)
	  continue;
	mrph = &Cha_mrph[++bkugiri_num];
	/* memset: unnecessary? */
	memset(mrph, 0, sizeof(mrph2_t));

	mrph->hinsi = h;
	mrph->con_tbl = cha_check_table_for_undef(h);
	mrph->midasi = mrph->yomi = mrph->base = mrph->pron = Cha_hinsi[h].bkugiri;
	mrph->info = "";
#if 0
	mrph->base_length = mrph->length = 0;
	mrph->ktype = mrph->kform = 0;
	mrph->is_undef = 0;
	mrph->weight = 0;
#endif
    }
    return bkugiri_num;
}

static int strcmp_anno(target)
    char *target;
{
    int i;

    for (i = 1; Cha_anno_info[i].str1; i++)
      if (!memcmp(target, Cha_anno_info[i].str1, Cha_anno_info[i].len1))
	return -i;
    return 0;
}

/*
 * check_undefword_len()
 *
 * type: -n(n=idx of Cha_anno_info) / 0(space) / 1(1byte) / 2(2byte) / 3(3byte)
 * stat: -n(n=idx of Cha_anno_info) / 0(space) / 1(e:[a-zA-Z],j:1byte)
 *       / 2(katakana) / 3(alphabet) / 4(otherwise)
 */
static void check_undefword_len(target, undefword_len, char_type, target_len)
    char *target, *char_type;
    short *undefword_len;
{
    unsigned char *t;
    short *ulen0, *ulen;
    char *type;
    int stat0 = 1, stat = 0;
    anno_info *anno = NULL;

    memset(undefword_len, 0, target_len * sizeof(short));
    memset(char_type, 1, target_len + 1);

    t = (unsigned char *)target;
    ulen0 = ulen = undefword_len;
    type = char_type;

    while ((char *)t < target + target_len) {
	if (stat0 < 0 &&
	    (anno->len2 == 0 ||
	     ((char *)t - target > anno->len2 &&
	     !memcmp(t - anno->len2, anno->str2, anno->len2))))
	  stat0 = 99;
	if (stat0 < 0) {
	    ;
	} else if (is_spc(*t)) {
	    *type = stat = 0;
	} else if ((stat = strcmp_anno(t)) < 0) {
	    *type = stat;
	    anno = &Cha_anno_info[-stat];
	} else if (Cha_lang_e
		   ? ((*t>='a' && *t<='z') || (*t>='A' && *t<='Z'))
		   : !(*t & 0x80)) {
	    /*: (!(*t & 0x80) && !(*t>='0'&&*t<='9'||*t=='.'||*t==','))) {*/
	    /* [a-zA-Z] / [^1-9.,] */
	    stat = 1;
	} else if (/* znekaku chou-on */
		   (stat0 == 2 && t[0] == 0xa1 && t[1] == 0xbc) ||
		   /* 0xa5a1-0xa5ff: zenkaku katakana */
		   (t[0] == 0xa5 && t[1] >= 0xa1 &&
		   !(stat0 != 2 &&
		     /* small aiueo,tsu,yayuyo,wa and chou-on */
		     ((t[0] == 0xa5 &&
		       (t[1] == 0xa1 || t[1] == 0xa3 || t[1] == 0xa5 ||
			t[1] == 0xa7 || t[1] == 0xa9 || t[1] == 0xc3 ||
			t[1] == 0xe3 || t[1] == 0xe5 || t[1] == 0xe7 ||
			t[1] == 0xee)) ||
		      (t[0] == 0xa1 && t[1] == 0xbc))))) {
	    stat = 2;
	} else if (t[0] == 0xa3 && t[1] >= 0xc1) {
	    /* 0xa3c1-0xa3ff: zenkaku alphabet */
	    stat = 3;
	} else {
	    stat = 4;
	    stat0 = 99;
	}

        if (stat != stat0) {
	    *ulen0 = ulen - ulen0;
	    ulen0 = ulen;
	}

	if (stat < 0 && stat != stat0) {
	    int len = anno->len1 + anno->len2;
	    if (len > target_len - ((char *)t - target))
	      len = target_len - ((char *)t - target);
	    t += len;
	    ulen += len;
	    type += len;
	} else if (t[0] == 0x8f && (t[1] & 0x80) && (t[2] & 0x80)) {
	    *type = 3;
	    t += 3; ulen += 3, type += 3;
	} else if ((t[0] & 0x80) && (t[1] & 0x80)) {
	    *type = 2;
	    t += 2; ulen += 2, type += 2;
	} else {
	    t++; ulen++, type++;
	}
	stat0 = stat;
    }

    *ulen0 = ulen - ulen0;
}

/***********************************************************************
 * cha_parse_sentence() - ʸǲϤ
 *
 * return value:
 *     0 - ok
 *     1 - no result / too many morphs
 ***********************************************************************/
int cha_parse_sentence(target, target_len, opt_nobk)
    char *target;
    int target_len; /* should be >0 */
    int opt_nobk;
{
    int  i, dic_no;
    int  pos, pos_end;
    int  path_idx[PATH1_NUM], path_idx_num;
    int  mrph_idx, new_mrph_idx;
    int  undef_len;
    int  bkugiri_num = 0, bk;
    static int path0 = -1;
    char *dic_buffer[256];
    static short undefword_len[CHA_INPUT_SIZE];
    static char char_type[CHA_INPUT_SIZE];

    check_undefword_len(target, undefword_len, char_type, target_len);
    cha_set_sentence(target, undefword_len, char_type);

    free_chars();
    free_path();
    free_mrph();

    /* ʸƬ */
    Cha_path[0].start = Cha_path[0].end = 0;
    Cha_path[0].path = &path0;
    Cha_path[0].cost = 0;
    Cha_path[0].mrph_p = 0;
    Cha_path[0].state = 0;

    Cha_path_num = 1;
    set_mrph_end(&Cha_mrph[0]);
    if (!opt_nobk)
      bkugiri_num = set_mrph_bkugiri();
    new_mrph_idx = mrph_idx = bkugiri_num + 1;

    for (pos = pos_end = 0; pos < target_len;
	 pos += char_type[pos] == 2 &&
	 !((unsigned char)target[pos] == 0xa3 &&
	 (unsigned char)target[pos+1] >= 0xc1)
	 ? 2 : undefword_len[pos],
	 pos_end = pos) {
#if 0
	printf("# mrph %d\n", mrph_idx);
#endif

	while (char_type[pos] <= 0)
	  pos += undefword_len[pos];
	if (pos == target_len)
	  break;

	path_idx_num = collect_mrphs_for_pos(pos_end, path_idx);
#if 0
	printf("# path_idx_num %d\n", path_idx_num);
#endif
	if (path_idx_num == 0)
	  continue;

#if 1
	for (bk = 0; bk < bkugiri_num; bk++) {
	    int path_num;
	    path_num = Cha_path_num;
	    /* ʸڤɲ */
	    if (check_connect(pos, bk + 1, path_idx) == FALSE)
	      goto error_end;
#if 0
	    printf("PATH: %d: %d -> %d\n", pos, path_num, Cha_path_num);
#endif
	    /* ɲä줿 path  path_idx ɲ */
	    if (Cha_path_num > path_num)
	      for (; path_num < Cha_path_num; path_num++)
		path_idx[path_idx_num++] = path_num;
	    path_idx[path_idx_num] = -1;
	}
#endif

	/* (ʸΤ߸) */
	if (char_type[pos] == 2) {
	    for (dic_no = 0; dic_no < Pat_ndicfile; dic_no++) {
		int nmrph;
		/* ѥȥꥷڤǤ򸡺 */
		pat_search(Pat_dicfile[dic_no], target + pos,
			   dic_buffer);
		/* ѤĤķǤ Cha_mrph ɲ */
		nmrph = convert_mrphs(target + pos, dic_buffer,
				      new_mrph_idx);
		if (nmrph < 0)
		  goto error_end;
		new_mrph_idx += nmrph;
	    }
	}

	for (dic_no = 0; dic_no < Suf_ndicfile; dic_no++) {
	    int nmrph;
	    /* SUFARY ե뤫Ǥ򸡺 */
	    sa_common_prefix_search(Suf_dicfile[dic_no],
				    target + pos,
				    dic_buffer);
	    /* ѤĤķǤ Cha_mrph ɲ */
	    nmrph = convert_mrphs(target + pos, dic_buffer,
				  new_mrph_idx);
	    if (nmrph < 0)
		goto error_end;
	    new_mrph_idx += nmrph;
	}

#if 0
	/*
	 * ̤ϢܥȤ 0 ΤȤñ줬1Ĥ⼭Ǥʤä
	 * ̤ Cha_mrph ɲ
	 */
	if (Cha_con_cost_undef == 0 || mrph_idx == new_mrph_idx) {
	    if (register_undef_mrph(target + pos, new_mrph_idx) == FALSE)
	      goto error_end;
	    new_mrph_idx += Cha_undef_info_num;
	}
#endif

	/* ̤ */
	undef_len = undefword_len[pos];
#if 0
	undef_len = undef_mrph_len(target + pos);
#endif

#if 0
	printf("# pos: %d, undef_len: %d\n", pos, undef_len);
#endif
	/* ľΥѥȤ³å */
	for (i = mrph_idx; i < new_mrph_idx; i++) {
	    /* ̤ƱĹñ줬ˤ̤ɲäʤ */
	    if (Cha_con_cost_undef > 0 && Cha_mrph[i].length == undef_len)
	      undef_len = 0;
	    if (check_connect(pos, i, path_idx) == FALSE)
	      goto error_end;
	}

	/* ̤ɲ */
	if (undef_len > 0) {
	    int no;
	    for (no = 0; no < Cha_undef_info_num; no++, new_mrph_idx++) {
		if (register_undef_mrph1(target + pos, new_mrph_idx,
					 undef_len, no) == FALSE)
		  goto error_end;
		if (check_connect(pos, new_mrph_idx, path_idx) == FALSE)
		  goto error_end;
#if 0
		printf("path[0]: %d:%d\n",Cha_path_num-1,Cha_path[Cha_path_num-1].path[0]);
#endif
	    }
	}

	mrph_idx = new_mrph_idx;
    }

    /* ʸ */
    set_mrph_end(&Cha_mrph[mrph_idx]);
    if (++mrph_idx % MRPH_NUM == 0 && malloc_mrph())
      goto error_end;

    collect_mrphs_for_pos(pos_end, path_idx);
    if (check_connect(pos, mrph_idx - 1, path_idx) == FALSE)
      goto error_end;

#ifdef DEBUG
    for (i = 1; i < mrph_idx - 1; i++) {
	printf("%4d: %4d ", i, Cha_mrph[i].con_tbl);
	print_mrph(0, &Cha_mrph[i], 'F', "%-11m %-11y %-11P3- %-14T  %F \n");
    }
#endif

#if 0
    printf("# num - mrph:%d path:%d, size - mrph:%d path:%d \n",
	   mrph_idx, Cha_path_num, sizeof(mrph2_t), sizeof(path_t));
#endif

    return 0;

  error_end:
    printf("Error: Too many morphs: %s\n", target);
#if 0
    printf("# num - mrph:%d path:%d, size - mrph:%d path:%d \n",
	   mrph_idx, Cha_path_num, sizeof(mrph2_t), sizeof(path_t));
#endif
    return 1;
}

