/*
 * ʸ⤷ñİʾ奻åȤˤmetawordȤư
 * ǤϳƼmetaword
 *
 * init_metaword_tab() metawordΤξ
 * anthy_make_metaword_all() contextmetaword
 *  -> ñmetawordä 
 *     anthy_make_syntactic_metaword() Ƥ
 * anthy_print_metaword() ꤵ줿metawordɽ
 *
 * Funded by IPA̤Ƨեȥ¤ 2001 10/29
 * Copyright (C) 2000-2003 TABATA Yusuke, UGAWA Tomoharu
 */
#include <stdlib.h>
#include <stdio.h>

#include <record.h>
#include <splitter.h>
#include <xstr.h>
#include "wordborder.h"

/* Ƽmeta_wordɤΤ褦˽뤫 */
struct metaword_type_tab_ anthy_metaword_type_tab[] = {
  {MW_DUMMY,0,MW_SEGINFO_NONE,MW_MARK_WL,MW_CHECK_WL_STR},
  {MW_SINGLE,0,MW_SEGINFO_NONE,MW_MARK_WL,MW_CHECK_WL_SINGLE},
  {MW_WRAP,0,MW_SEGINFO_WRAPPED,MW_MARK_WRAP,MW_CHECK_WL_WRAP},
  {MW_NAMEPAIR,0,MW_SEGINFO_NONE,MW_MARK_PAIR,MW_CHECK_PAIR},
  {MW_V_RENYOU_A,100,MW_SEGINFO_COMBINED,MW_MARK_LEFT_WL,MW_CHECK_BORDER},
  {MW_V_RENYOU_NOUN,100,MW_SEGINFO_COMBINED,MW_MARK_LEFT_WL,MW_CHECK_BORDER},
  {MW_NUM_XX,0,MW_SEGINFO_NONE,MW_MARK_PAIR,MW_CHECK_PAIR},
  {MW_NOUN_NOUN_PREFIX,0,MW_SEGINFO_NONE,MW_MARK_PAIR,MW_CHECK_PAIR},
  {MW_OCHAIRE,0,MW_SEGINFO_NONE,MW_MARK_OCHAIRE,MW_CHECK_OCHAIRE},
  {MW_OCHAIRE_LEAF,0,MW_SEGINFO_OCHAIRE_LEAF,MW_MARK_OCHAIRE_LEAF,MW_CHECK_NONE},
  /**/
  {MW_SENTENCE,0,MW_SEGINFO_NONE,MW_MARK_PAIR,MW_CHECK_PAIR},
  {MW_MODIFIED,0,MW_SEGINFO_NONE,MW_MARK_PAIR,MW_CHECK_PAIR},
  {MW_END,0,MW_SEGINFO_NONE,MW_MARK_NONE,MW_CHECK_NONE}
};

/** ꤷ򥫥Сextentõ */
struct extent *
anthy_find_extent(struct splitter_context *sc,
		  int from, int len, int force)
{
  int i;
  struct char_node *cnode;
  struct extent *ex;

  if (len == 0) {
    return NULL;
  }
  cnode = &sc->word_split_info->cnode[from];
  if (len <= cnode->max_len && cnode->ex[len-1]) {
    return cnode->ex[len-1];
  }
  if (!force) {
    return 0;
  }
  /* ݥ󥿤 */
  if (len > cnode->max_len) {
    cnode->ex = realloc(cnode->ex, sizeof(struct extent *) *
			len);
    for (i = cnode->max_len; i < len; i++) {
      cnode->ex[i] = 0;
    }
  }

  /* extentν */
  ex = anthy_smalloc(sc->word_split_info->ExAllocator);
  cnode->ex[len-1] = ex;
  ex->mw = 0;
  ex->best = 0;
  ex->score = 0;
  ex->from = from;
  ex->len = len;
  ex->is_dummy_char = 0;
  ex->mw_count = 1;
  if (ex->len == 1) {
    xchar xc;
    xc = *sc->ce[ex->from].c;
    if (anthy_get_xchar_type(xc) == XCT_NONE) {
      /* ɤΥ饹ˤ°ʤʸʸ
	 ʸΰȤư */
      ex->is_dummy_char = 1;
    }
  }

  if (cnode->max_len < len) {
    cnode->max_len = len;
  }
  return ex;
}

/* ƥmetawordɲä */
static void
commit_metaword(struct splitter_context *sc,
		struct meta_word *mw)
{
  struct word_split_info_cache *info = sc->word_split_info;
  struct extent *ex = anthy_find_extent(sc, mw->from, mw->len, 1);

  mw->score += anthy_metaword_type_tab[mw->type].score;

  /* ƱĥΡɤΥꥹ */
  mw->next = info->cnode[mw->from].mw;
  info->cnode[mw->from].mw = mw;

  /* ƱϰϤĥΡ */
  mw->same_extent = ex->mw;
  ex->mw = mw;
  if (0) {
    anthy_print_metaword(sc, mw);
  }
}

static void
anthy_do_print_metaword(struct splitter_context *sc,
			struct meta_word *mw,
			int indent)
{
  int i;
  for (i = 0; i < indent; i++) {
    printf(" ");
  }
  printf("*meta word type=%d(%d-%d)%d:score=%d*\n",
	 mw->type, mw->from, mw->len, mw->mw_count, mw->score);
  if (mw->wl) {
    anthy_print_word_list(sc, mw->wl);
  } if (mw->mw1 && mw->mw2) {
    anthy_do_print_metaword(sc, mw->mw1, indent + 1);
    anthy_do_print_metaword(sc, mw->mw2, indent + 1);
  }
}

void
anthy_print_metaword(struct splitter_context *sc,
		     struct meta_word *mw)
{
  anthy_do_print_metaword(sc, mw, 0);
}

static struct meta_word *
alloc_metaword(struct splitter_context *sc)
{
  struct meta_word *mw;
  mw = anthy_smalloc(sc->word_split_info->MwAllocator);
  mw->type = MW_SINGLE;
  anthy_init_segclass(&mw->seg_class);
  mw->mw_count = 1;
  mw->score = 0;
  mw->wl = 0;
  mw->mw1 = 0;
  mw->mw2 = 0;
  mw->parent = 0;
  mw->cand_hint = 0;
  mw->si = 0;
  return mw;
}

/*
 * wordlistĤʤ롢metaword򥳥ߥåȤ
 */
static void
make_simple_metaword(struct splitter_context *sc)
{
  int i;
  for (i = 0; i < sc->char_count; i++) {
    struct word_list *wl;
    for (wl = sc->word_split_info->cnode[i].wl;
	 wl; wl = wl->next) {
      struct meta_word *mw = alloc_metaword(sc);
      mw->wl = wl;
      mw->from = wl->from;
      mw->len = wl->len;
      mw->score = wl->score;
      mw->type = MW_SINGLE;
      mw->seg_class = wl->seg_class;
      commit_metaword(sc, mw);
    }
  }
}

/*
 * metawordºݤ˷礹
 */
struct meta_word *
anthy_do_combine_metaword(struct splitter_context *sc,
			  enum metaword_type type,
			  struct meta_word *mw, struct meta_word *mw2)
{
  struct meta_word *n;
  n = alloc_metaword(sc);
  n->from = mw->from;
  n->len = mw->len + mw2->len;
  n->score = mw->score + mw2->score;
  n->type = type;
  n->mw1 = mw;
  n->mw2 = mw2;
  n->seg_class = mw2->seg_class;
  commit_metaword(sc, n);
  return n;
}

/*
 * ưϢѷ + ƻ첽 ֡䤹פʤ
 */
static void
try_combine_v_renyou_a(struct splitter_context *sc,
		       struct meta_word *mw, struct meta_word *mw2)
{
  wtype_t w2 = mw2->wl->part[PART_CORE].wt;

  if (mw->wl->part[mw->wl->last_part].pos == POS_V
      && mw->wl->part[mw->wl->last_part].ct == CT_RENYOU
      && anthy_wtype_get_pos(w2) == POS_A) {
    /* ƻǤϤΤǼΥå */
    if (anthy_get_seq_ent_wtype_freq(mw2->wl->part[PART_CORE].seq, 
				     anthy_wtype_a_tail_of_v_renyou)) {
      anthy_do_combine_metaword(sc, MW_V_RENYOU_A, mw, mw2);
    }
  }
}

/*
 * ưϢѷ + ̾첽(#D2T35)  (Τ)פʤ
 */
static void
try_combine_v_renyou_noun(struct splitter_context *sc,
			  struct meta_word *mw, struct meta_word *mw2)
{
  wtype_t w2 = mw2->wl->part[PART_CORE].wt;
  if (mw->wl->part[mw->wl->last_part].pos == POS_V &&
      mw->wl->part[mw->wl->last_part].ct == CT_RENYOU &&
      anthy_wtype_get_pos(w2) == POS_NOUN &&
      anthy_wtype_get_scos(w2) == SCOS_T40) {
    anthy_do_combine_metaword(sc, MW_V_RENYOU_NOUN, mw, mw2);
  }
}


/*
 * ưϢѷ + ̾(#N2T35) ֥ ѡפʤ
 */
static void
try_combine_noun_noun_postfix(struct splitter_context *sc,
			      struct meta_word *mw, struct meta_word *mw2)
{
  wtype_t w1 = mw->wl->part[PART_CORE].wt;
  if (anthy_wtype_get_pos(w1) == POS_NOUN &&
      mw2->wl->part[PART_CORE].len > 1 &&
      anthy_get_seq_ent_wtype_freq(mw2->wl->part[PART_CORE].seq, 
				   anthy_wtype_noun_and_postfix)) {
    anthy_do_combine_metaword(sc, MW_NOUN_NOUN_PREFIX, mw, mw2);
  }
}

/*
 *  + ̾礹
 */
static void
try_combine_name(struct splitter_context *sc,
		 struct meta_word *mw, struct meta_word *mw2)
{
  int f, f2;
  f = anthy_get_seq_flag(mw->wl->part[PART_CORE].seq);
  f2 = anthy_get_seq_flag(mw2->wl->part[PART_CORE].seq);
  if ((f & NF_FAMNAME) && (f2 & NF_FSTNAME)) {
    if (anthy_wtype_get_scos(mw->wl->part[PART_CORE].wt) == SCOS_FAMNAME &&
	anthy_wtype_get_scos(mw2->wl->part[PART_CORE].wt) == SCOS_FSTNAME) {
      anthy_do_combine_metaword(sc, MW_NAMEPAIR, mw, mw2);
    }
  }
}

static void
try_combine_10_1(struct splitter_context *sc,
		 struct meta_word *mw, struct meta_word *mw2)
{
  int f1, f2;
  f1 = anthy_get_seq_flag(mw->wl->part[PART_CORE].seq);
  f2 = anthy_get_seq_flag(mw2->wl->part[PART_CORE].seq);
  if (!(f1 & NF_NUM)) {
    return ;
  }
  if (!(f2 & NF_NUM)) {
    return ;
  }
  if (anthy_get_seq_ent_wtype_freq(mw->wl->part[PART_CORE].seq,
				   anthy_wtype_n10) &&
      anthy_get_seq_ent_wtype_freq(mw2->wl->part[PART_CORE].seq,
				   anthy_wtype_n1)) {
    anthy_do_combine_metaword(sc, MW_NUM_XX, mw, mw2);
  }
}

/* ٤metawordȷǤ뤫å */
static void
try_combine_metaword(struct splitter_context *sc,
		     struct meta_word *mw1, struct meta_word *mw2)
{
  /**/
  if (!mw1->wl || !mw2->wl) {
    return ;
  }
  /* metawordηԤˤϡ³
     metawordƬʤȤɬ */
  if (mw2->wl->part[PART_PREFIX].len == 0) {
    /*anthy_print_metaword(sc, mw);*/
    try_combine_name(sc, mw1, mw2);
    try_combine_v_renyou_a(sc, mw1, mw2);
    try_combine_v_renyou_noun(sc, mw1, mw2);
    try_combine_noun_noun_postfix(sc, mw1, mw2);
    try_combine_10_1(sc, mw1, mw2);
  }
}


static void
combine_metaword(struct splitter_context *sc)
{
  int i;

  struct word_split_info_cache *info = sc->word_split_info;
  /* metawordκüˤ롼 */
  for (i = 0; i < sc->char_count; i++){
    struct meta_word *mw, *mw2;
    /* metawordΥ롼 */
    for (mw = info->cnode[i].mw;
	 mw; mw = mw->next) {
      /* metawordüãƤʤ */
      if (mw->len + i < sc->char_count) {
	/* metawordαmetawordΤĤҤȤĤ */
	for (mw2 = info->cnode[mw->len+i].mw; 
	     mw2; mw2 = mw2->next) {
	  /* Ǥ뤫å */
	  try_combine_metaword(sc, mw, mw2);
	}
      }
    }
  }
}

static void
make_dummy_metaword(struct splitter_context *sc, int from,
		    int len, int orig_len)
{
  int score = 0;
  struct meta_word *mw, *n;
  struct extent *ex;

  ex = anthy_find_extent(sc, from, orig_len, 0);
  if (ex) {
    for (mw = ex->mw;
	 mw; mw = mw->same_extent) {
      if (mw->score > score) {
	score = mw->score;
      }
    }
  }

  n = alloc_metaword(sc);
  n->type = MW_DUMMY;
  n->from = from;
  n->len = len;
  n->score = 3 * score * len / orig_len;
  commit_metaword(sc, n);
}

/*
 * ʸ򿭤Ф餽ФƤ
 */
static void
make_expanded_metaword_all(struct splitter_context *sc)
{
  int i, j;
  if (anthy_select_section("EXPANDPAIR", 0) == -1) {
    return ;
  }
  for (i = 0; i < sc->char_count; i++) {
    for (j = 1; j < sc->char_count - i; j++) {
      /* ƤʬʸФ */
      xstr xs;
      xs.len = j;
      xs.str = sc->ce[i].c;
      if (anthy_select_column(&xs, 0) == 0) {
	/* ʬʸϲ˳оݤȤʤä */
        int k;
        int nr = anthy_get_nr_values();
        for (k = 0; k < nr; k++) {
          xstr *exs;
          exs = anthy_get_nth_xstr(k);
          if (exs && exs->len <= sc->char_count - i) {
            xstr txs;
            txs.str = sc->ce[i].c;
            txs.len = exs->len;
            if (!anthy_xstrcmp(&txs, exs)) {
              make_dummy_metaword(sc, i, txs.len, j);
            }
          }
        }
      }
    }
  }
}

/* ؽmetaword */
static void
make_ochaire_metaword(struct splitter_context *sc,
		      int from, int len)
{
  struct meta_word *mw, *mw0;
  int count;
  int s;
  int j;
  int seg_len;
  int mw_len = 0;

  /* ʸ */
  count = anthy_get_nth_value(0);
  count --;
  /* ֱʸΤʸιפ׻ */
  for (s = 0, j = 0; j < count - 1; j++) {
    s += anthy_get_nth_value(j * 2 + 1);
  }
  /* ֱʸmetaword */
  seg_len = anthy_get_nth_value((count - 1) * 2 + 1);
  mw = alloc_metaword(sc);
  mw->type = MW_OCHAIRE_LEAF;
  mw->from = from + s;
  mw->len = seg_len;
  mw->cand_hint = anthy_get_nth_xstr((count - 1) * 2 + 2);
  mw_len += seg_len;
  /* ʳʸmetaword */
  for (j-- ; j >= 0; j--) {
    struct meta_word *n;
    seg_len = anthy_get_nth_value(j * 2 + 1);
    s -= seg_len;
    n = alloc_metaword(sc);
    n->type = MW_OCHAIRE_LEAF;
    /* metawordĤʤ */
    n->mw1 = mw;
    n->from = from + s;
    n->len = seg_len;
    n->cand_hint = anthy_get_nth_xstr(j * 2 + 2);
    commit_metaword(sc, mw);
    mw = n;
    mw_len += seg_len;
  }
  commit_metaword(sc, mw);
  /* Τ򥫥Сmetawordɲ */
  mw0 = alloc_metaword(sc);
  mw0->type = MW_OCHAIRE;
  mw0->mw1 = mw;
  mw0->from = from;
  mw0->len = mw_len;
  mw0->score = OCHAIRE_SCORE;
  commit_metaword(sc, mw0);
}

/*
 * ʣʸȤ򤫤鸡
 */
static void
make_ochaire_metaword_all(struct splitter_context *sc)
{
  int i;
  if (anthy_select_section("OCHAIRE", 0) == -1) {
    return ;
  }

  for (i = 0; i < sc->char_count; i++) {
    xstr xs;
    xs.len = sc->char_count - i;
    xs.str = sc->ce[i].c;
    if (anthy_select_longest_column(&xs) == 0) {
      xstr* key;
      int len;
      anthy_mark_column_used();
      key = anthy_get_index_xstr();
      len = key->len;

      make_ochaire_metaword(sc, i, len);
      /* 󸫤Ĥä meta_word μʸϤ */
      i += len - 1;
      break;
    }
  }
}

/*
 * metawordθλ¿ʸ򤯤äĤmetaword
 */
static void
make_metaword_with_depchar(struct splitter_context *sc,
			   struct meta_word *mw)
{
  int j;
  /* metawordΤȤ³ƤΩǤʤʸο */
  for (j = 0; mw->from + mw->len + j < sc->char_count; j++) {
    int p = mw->from + mw->len + j;
    if (!(anthy_get_xchar_type(*sc->ce[p].c) & XCT_PART)) {
      break;
    }
  }
  /* ΩǤʤʸΤǡդmetaword */
  if (j > 0) {
    struct meta_word *n;
    n = alloc_metaword(sc);
    n->type = MW_DUMMY;
    n->from = mw->from;
    n->len = mw->len + j;
    n->score = mw->score;
    commit_metaword(sc, n);
  }
}

static void 
make_metaword_with_depchar_all(struct splitter_context *sc)
{
  int i;
  struct word_split_info_cache *info = sc->word_split_info;

  /* metawordФ */
  for (i = 0; i < sc->char_count; i++) {
    struct meta_word *mw;
    for (mw = info->cnode[i].mw;
	 mw; mw = mw->next) {
      make_metaword_with_depchar(sc, mw);
    }
  }
}

static void 
bias_to_single_char_metaword(struct splitter_context *sc)
{
  int i;
  struct extent *ex;

  for (i = 0; i < sc->char_count; i++) {
    struct meta_word *mw;
    ex = anthy_find_extent(sc, i, 1, 0);
    if (!ex) {
      continue;
    }
    /* ʸextentˤƤmetawordΥ򲼤 */
    for (mw = ex->mw; mw; mw = mw->same_extent) {
      mw->score = mw->score * 1 / 10;
    }
  }
}

void
anthy_make_metaword_all(struct splitter_context *sc)
{
  /* ޤword_listämetaword */
  make_simple_metaword(sc);

  /* metaword礹 */
  combine_metaword(sc);

  /* 礵줿ʸ */
  make_expanded_metaword_all(sc);

  /* Ĺʤɤε桢¾ε */
  make_metaword_with_depchar_all(sc);

  /* 򤤤 */
  make_ochaire_metaword_all(sc);

  /* ʸʸϸ */
  bias_to_single_char_metaword(sc);

  /**/
  anthy_make_syntactic_metaword(sc);
}
