/*
 * ʸФƸΥꥹȤ롣
 * make_candidates()contextƤФ롣
 *
 * ϼˡǹԤ
 * (1)splitterƤʻФproc_splitter_info()
 *    
 * (2)Ҥ餬ʤΤߤȥʤΤߤθ
 * (3)ǸʸȲᤷ̵
 */
/*
 * Funded by IPA̤Ƨեȥ¤ 2001 9/30
 * Copyright (C) 2000-2004 TABATA Yusuke
 * Copyright (C) 2002 UGAWA Tomoharu
 *
 * $Id: compose.c,v 1.22 2002/11/17 14:45:47 yusuke Exp $
 */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <anthy.h> /* for ANTHY_*_ENCODING */
#include <conf.h>
#include <dic.h>
#include <splitter.h>
#include "main.h"

/* ͹ֹindex atoiǾ̤η夬ƤΤ
 * 0ɲä3⤷7index
 */
static void
make_zipcode_index(long long num, char *buf)
{
  const char *fmt = "";
  if (num < 10) {
    fmt = "00%d ";
  } else if (num < 100) {
    fmt = "0%d ";
  } else if (num < 1000) {
    fmt = "%d ";
  } else if (num < 10000) {
    fmt = "000%d ";
  } else if (num < 100000) {
    fmt = "00%d ";
  } else if (num < 1000000) {
    fmt = "0%d ";
  } else {
    fmt = "%d ";
  }
  sprintf(buf, fmt, (int)num);
}

static xstr *
search_zipcode_dict(xstr *xs, long long num)
{
  FILE *fp;
  char buf[1000];
  char index[30];
  int len;

  if (xs->len != 3 && xs->len != 7) {
    return NULL;
  }

  if (num > 9999999 || num < 1) {
    return NULL;
  }
  fp = fopen(anthy_conf_get_str("ZIPDICT_EUC"), "r");
  if (!fp) {
    return NULL;
  }
  make_zipcode_index(num, index);
  len = strlen(index);
  while (fgets(buf, 1000, fp)) {
    if (!strncmp(buf, index, len)) {
      char *tmp;
      /* Ԥä */
      buf[strlen(buf) - 1] = 0;
      /* ֺǸΥڡΤȤ̾ȤƤȤ(<=ޤ)*/
      tmp = strrchr(buf, ' ');
      tmp ++;
      fclose(fp);
      return anthy_cstr_to_xstr(tmp, ANTHY_EUC_JP_ENCODING);
    }
  }
  fclose(fp);
  return NULL;
}

static struct cand_ent *
alloc_cand_ent(void)
{
  struct cand_ent *ce;
  ce = (struct cand_ent *)malloc(sizeof(struct cand_ent));
  ce->nr_words = 0;
  ce->elm = 0;
  ce->core_elm_index = -1;
  return ce;
}
/*
 * ʣ
 */
static struct cand_ent *
dup_candidate(struct cand_ent *ce)
{
  struct cand_ent *ce_new;
  int i;
  ce_new = alloc_cand_ent();
  ce_new->nr_words = ce->nr_words;
  ce_new->str.len = ce->str.len;
  ce_new->str.str = anthy_xstr_dup_str(&ce->str);
  ce_new->elm = malloc(sizeof(struct cand_elm)*ce->nr_words);
  ce_new->flag = ce->flag;
  ce_new->si = ce->si;
  ce_new->core_elm_index = ce->core_elm_index;

  for (i = 0 ; i < ce->nr_words ; i++) {
    ce_new->elm[i] = ce->elm[i];
  }
  return ce_new;
}

/** ʸ˸ɲä */
static void
push_back_candidate(struct seg_ent *seg, struct cand_ent *ce)
{
  /* seg_ent˸ceɲ */
  seg->nr_cands++;
  seg->cands = (struct cand_ent **)
    realloc(seg->cands, sizeof(struct cand_ent *) * seg->nr_cands);
  seg->cands[seg->nr_cands - 1] = ce;
}


static void
push_back_zipcode_candidate(struct seg_ent *seg)
{
  struct cand_ent *ce;
  long long code;
  xstr *str;

  code = anthy_xstrtoll(&seg->str);
  if (code == -1) {
    return ;
  }
  str = search_zipcode_dict(&seg->str, code);
  if (!str) {
    return ;
  }

  ce = alloc_cand_ent();
  ce->str = *str;
  ce->si = 0;
  ce->flag = CEF_SINGLEWORD;
  push_back_candidate(seg, ce);
  free(str);
}

static void
push_back_guessed_candidate(struct seg_ent *seg)
{
  xchar xc;
  xstr *xs;
  struct cand_ent *ce;
  if (seg->str.len < 2) {
    return ;
  }
  /* ǸʸϽ줫 */
  xc = seg->str.str[seg->str.len - 1];
  if (!(anthy_get_xchar_type(xc) & XCT_DEP)) {
    return ;
  }
  /* Ǹʸʳ򥫥ʤˤƤߤ */
  ce = alloc_cand_ent();
  xs = anthy_xstr_hira_to_kata(&seg->str);
  xs->str[xs->len-1] = xc;
  ce->str.str = anthy_xstr_dup_str(xs);
  ce->str.len = xs->len;
  ce->flag = CEF_GUESS;
  ce->si = 0;
  anthy_free_xstr(xs);
  push_back_candidate(seg, ce);
}

/** Ƶ1ñ줺ĸƤƤ */
static int
enum_candidates(struct seg_ent *seg,
		struct cand_ent *ce,
		int from, int n)
{
  int i, p;
  struct cand_ent *cand;
  int nr_cands = 0;

  if (n == ce->si->nr_word_info) {
    /*  */
    /* ʸβϤʤäʬʸɲ */
    xstr tail;
    tail.len = seg->len - ce->si->info_len;
    tail.str = &seg->str.str[ce->si->info_len];
    anthy_xstrcat(&ce->str, &tail);
    push_back_candidate(seg, dup_candidate(ce));
    return 1;
  }

  p = anthy_get_nr_dic_ents(ce->elm[n].se, &ce->elm[n].str);
  /* ʻξˤ̤ѴǼñعԤ */
  if (anthy_wtype_get_pos(ce->si->word_info[n].wt) == POS_INVAL ||
      p == 0) {
    xstr xs;
    xs.len = ce->si->word_info[n].len;
    xs.str = &seg->str.str[from];
    cand = dup_candidate(ce);
    cand->elm[n].nth = -1;
    cand->elm[n].id = -1;
    anthy_xstrcat(&cand->str, &xs);
    nr_cands = enum_candidates(seg,cand,
			       from + ce->si->word_info[n].len,
			       n + 1);
    anthy_release_cand_ent(cand);
    return nr_cands;
  }

  /* ʻ줬ƤƤΤǡʻ˥ޥåΤƤ */
  for (i = 0; i < p; i++) {
    wtype_t wt;
    anthy_get_nth_dic_ent_wtype(ce->elm[n].se, &ce->str, i, &wt);
    anthy_wtype_set_ct(&ce->si->word_info[n].wt, CT_NONE);
    if (anthy_wtypecmp(ce->si->word_info[n].wt, wt)) {
      xstr word, yomi;
      yomi.len = ce->si->word_info[n].len;
      yomi.str = &seg->str.str[from];
      cand = dup_candidate(ce);
      anthy_get_nth_dic_ent_str(cand->elm[n].se,
				&yomi, i, &word);
      cand->elm[n].nth = i;
      cand->elm[n].id = anthy_get_nth_dic_ent_id(ce->elm[n].se, i);
      /* ñ */
      anthy_xstrcat(&cand->str, &word);
      free(word.str);
      /* ʬƵƤӽФ³ꤢƤ */
      nr_cands += enum_candidates(seg, cand, 
				  from + ce->si->word_info[n].len,
				  n+1);
      anthy_release_cand_ent(cand);
    }
  }
  return nr_cands;
}

/**
 * ʸΤޤñ(ñޤ)θ
 */
static void
push_back_singleword_candidate(struct seg_ent *seg)
{
  seq_ent_t se;
  struct cand_ent *ce;
  wtype_t wt;
  int i, n;
  xstr xs;

  se = anthy_get_seq_ent_from_xstr(&seg->str);
  n = anthy_get_nr_dic_ents(se, &seg->str);
  /* γƥȥФ */
  for (i = 0; i < n; i++) {
    int ct;
    /* ʻФ */
    anthy_get_nth_dic_ent_wtype(se, &seg->str, i, &wt);
    ct = anthy_wtype_get_ct(wt);
    /* ߷ѤʤΤθʤ */
    if (ct == CT_SYUSI || ct == CT_NONE) {
      ce = alloc_cand_ent();
      anthy_get_nth_dic_ent_str(se,&seg->str, i, &xs);
      ce->str.str = xs.str;
      ce->str.len = xs.len;
      ce->flag = CEF_SINGLEWORD;
      ce->si = 0;
      push_back_candidate(seg, ce);
    }
  }
}

static void
push_back_noconv_candidate(struct seg_ent *seg)
{
  /* ̵ѴҲ̾ˤʤʿ̾Τߤˤʤɲ */
  struct cand_ent *ce;
  xstr *xs;

  /* Ҥ餬ʤΤ */
  ce = alloc_cand_ent();
  ce->str.str = anthy_xstr_dup_str(&seg->str);
  ce->str.len = seg->str.len;
  ce->si = 0;
  ce->flag = CEF_HIRAGANA;
  push_back_candidate(seg, ce);

  /* ˥ */
  ce = alloc_cand_ent();
  xs = anthy_xstr_hira_to_kata(&seg->str);
  ce->str.str = anthy_xstr_dup_str(xs);
  ce->str.len = xs->len;
  ce->flag = CEF_KATAKANA;
  ce->si = 0;
  anthy_free_xstr(xs);
  push_back_candidate(seg, ce);
}

/** seginfoξѤƸ */
static void
make_candidate_from_seginfo(struct seg_ent *se,
			    struct seg_info *si)
{
  /*
   * ñʻ줬ꤵ줿֤ǥߥåȤ롣
   */
  struct cand_ent *ce;
  int from, i;

  /* ʣ(1ޤ)ñǹʸñƤƤ */
  ce = alloc_cand_ent();
  ce->nr_words = si->nr_word_info;
  ce->str.str = 0;
  ce->str.len = 0;
  ce->elm = malloc(sizeof(struct cand_elm)*ce->nr_words);
  ce->si = si;
  from = 0;

  /* ʸι¤ξ˥ԡ */
  for (i = 0; i < si->nr_word_info; i++) {
    xstr core_xs;
    core_xs.str = &se->str.str[from];
    core_xs.len = si->word_info[i].len;
    ce->elm[i].se = anthy_get_seq_ent_from_xstr(&core_xs);
    ce->elm[i].str.str = core_xs.str;
    ce->elm[i].str.len = si->word_info[i].len;
    ce->elm[i].wt = si->word_info[i].wt;
    ce->elm[i].ratio = si->word_info[i].ratio;
    ce->core_elm_index = si->core_word_info_index;
    from += si->word_info[i].len;
  }
  ce->flag = CEF_NONE;
  enum_candidates(se, ce, 0, 0);
  anthy_release_cand_ent(ce);
}

/** splitterξѤƸ
 */
static void
proc_splitter_info(struct seg_ent *se,
		   struct seg_info *si)
{
  switch (si->type) {
  case SI_NORMAL:
    make_candidate_from_seginfo(se, si);
    break;
  case SI_CAND:
    {
      /* ʸ󤬥쥯Ȥ˻ꤵ줿 */
      {
	struct cand_ent *ce;
	ce = alloc_cand_ent();
	ce->str.str = anthy_xstr_dup_str(&si->cand);
	ce->str.len = si->cand.len;
	ce->flag = CEF_OCHAIRE;
	ce->si = si;
	if (si->info_len < se->len) {
	  /* seginfoǥСƤʤΰʸդ */
	  xstr xs;
	  xs.str = &se->str.str[si->info_len];
	  xs.len = se->len - si->info_len;
	  anthy_xstrcat(&ce->str ,&xs);
	}
	push_back_candidate(se, ce);
      }
      break;
    }
  }
}

/** context.cƽФäȤʪ
 * İʾθɬ
 */
void
anthy_make_candidates(struct seg_ent *se)
{
  int i, limit = 0;

  /* limit㤤scoreseginfoϸʤ */
  if (se->nr_seginfo) {
    limit = se->si[0]->score / 3;
  }
  for (i = 0; i < se->nr_seginfo; i++) {
    if (se->si[i]->score > limit) {
      /**/
      proc_splitter_info(se, se->si[i]);
    }
  }

  /* ñʤɤθ */
  push_back_singleword_candidate(se);
  /* ͹ֹ */
  push_back_zipcode_candidate(se);
  /* Ҥ餬ʡʤ̵Ѵȥ */
  push_back_noconv_candidate(se);

  /* 䤬Ĥ̵ȤϺǸ夬ˤǻĤ꤬ʿ̾θ뤫 */
  push_back_guessed_candidate(se);
}

/** seginfo */
void
anthy_make_seginfo_array(struct anthy_context *ac,
			 struct seg_ent *se)
{
  int i;
  se->si = 0;
  for (i = se->len; i > 0; i--) {
    int j;
    /* ǸȤĤƤľʸ */
    if (i < se->len &&
	anthy_get_xchar_type(se->str.str[i]) & XCT_PART) {
      /* FIXME Ȥꤨʤ¤Ӥ򤷤Ƥ */
      i--;
      continue ;
    }
    se->nr_seginfo = anthy_get_nr_seginfo(&ac->split_info, se->from, i);
    if (!se->nr_seginfo) {
      continue ;
    }
    /* seginfo˼ */
    se->si = malloc(sizeof(struct seg_info*) * se->nr_seginfo);
    for (j = 0; j < se->nr_seginfo; j++) {
      se->si[j] = anthy_get_nth_seginfo(&ac->split_info, se->from, i, j);
    }
    return ;
  }
}
