/*
 * ʸδط
 * Copyright (C) 2006 Higashiyama Masahiko (thanks google summer of code program)
 * Copyright (C) 2002-2007 TABATA Yusuke
 *
 * anthy_reorder_candidates_by_relation()
 *
 */
/*
  This library is free software; you can redistribute it and/or
  modify it under the terms of the GNU Lesser General Public
  License as published by the Free Software Foundation; either
  version 2 of the License, or (at your option) any later version.

  This library is distributed in the hope that it will be useful,
  but WITHOUT ANY WARRANTY; without even the implied warranty of
  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  Lesser General Public License for more details.

  You should have received a copy of the GNU Lesser General Public
  License along with this library; if not, write to the Free Software
  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
 */
#if 0		/* Patched by G-HAL */
#include <arpa/inet.h>
#include <stdlib.h>

#include <anthy/segclass.h>
#include <anthy/segment.h>
#include <anthy/ordering.h>
#include <anthy/dic.h>
#include <anthy/diclib.h>
#include <anthy/feature_set.h>
#include <anthy/corpus.h>
#include "sorter.h"
#else
#if defined(HAVE_CONFIG_H)
# include "config.h"
#endif

#if defined(HAVE_STDLIB_H)
# include <stdlib.h>
#endif
#if !defined(__STDC_LIMIT_MACROS)
# define __STDC_LIMIT_MACROS
#endif
#if !defined(__STDC_CONSTANT_MACROS)
# define __STDC_CONSTANT_MACROS
#endif
#if defined(HAVE_STDINT_H)
# include <stdint.h>
#endif
#if !defined(__STDC_FORMAT_MACROS)
# define __STDC_FORMAT_MACROS
#endif
#if defined(HAVE_INTTYPES_H)
# include <inttypes.h>
#endif
#if defined(HAVE_ARPA_INET_H)
# include <arpa/inet.h>
#endif
#if defined(HAVE_SYS_TYPES_H)
# include <sys/types.h>
#endif
#if defined(HAVE_NETINET_IN_H)
# include <netinet/in.h>
#endif
#if defined(HAVE_ASSERT_H)
# include <assert.h>
#endif

#include "anthy/settings.h"	/* Patched by G-HAL, Wed,21 Oct,2008 */
#include "anthy/cand_ent_score.h"	/* Patched by G-HAL, Sat,17 Oct,2009 */
#include "anthy/segclass.h"
#include "anthy/segment.h"
#include "anthy/ordering.h"
#include "anthy/dic.h"
#include "anthy/diclib.h"
#include "anthy/feature_set.h"
#include "anthy/corpus.h"
#include "anthy/splitter.h"	/* Patched by G-HAL, Mon,12 Oct,2009 */
#include "sorter.h"
#include "src-splitter/wordborder.h"	/* Patched by G-HAL, Sun,18 Oct,2009 */
#include "src-ordering/ucdict.h"	/* Patched by G-HAL, Sat,17 Oct,2009 */
#include "src-worddic/dic_main.h"	/* Patched by G-HAL, Sat,31 Oct,2009 */
#include "src-splitter/cand_wt_name.h"		/* Patched by G-HAL, Sun,01 Nov,2009 */
#include "src-splitter/depgraph_type_sym.h"	/* Patched by G-HAL, Sun,01 Nov,2009 */
#endif

#define MAX_COLLISION 4
#define SEARCH_LIMIT 100
#define MAX_NEIGHBOR 10


/* ʸѤΥѥ */
static struct corpus_ {
  /* header */
  void *corpus_bucket;
  void *corpus_array;
  /**/
  int *bucket;
  int *array;
  /**/
  int bucket_size;
  int array_size;
} corpus_info;

/* Ѥiterator */
struct iterator {
  /* Υȸߤξ */
  int key;
  int idx;
  /* ξ */
  int limit;
};

struct neighbor {
  int nr;
  int id[MAX_NEIGHBOR];
};

#if 0	/* Patched by G-HAL, Mon,12 Oct,2009 */
/** ʸ@seg@from_word_idñȶطˤ
 *  䤬뤫ɤõХ夲롣
 */
static void
reorder_candidate(int from_word_id, struct seg_ent *seg)
{
  int i, pos;
  struct cand_ent *ce;
  if (NULL == seg->cands) { /* ⤷ϳؽǡƤк */
    return;
  }
  ce = seg->cands[0];
  if (ce->core_elm_index == -1) {
    return ;
  }
  /* 0ܤθʻ */
  pos = anthy_wtype_get_pos(ce->elm[ce->core_elm_index].wt);

  for (i = 0; i < seg->nr_cands; i++) {
    int word_id;
    ce = seg->cands[i];
    if (ce->core_elm_index == -1) {
      continue;
    }
    word_id = ce->elm[ce->core_elm_index].id;
    if (anthy_dic_check_word_relation(from_word_id, word_id) &&
	anthy_wtype_get_pos(ce->elm[ce->core_elm_index].wt) == pos) {
      /* ˥ޥåΤǡΥ򹹿 */
      ce->flag |= CEF_USEDICT;
     #if 0	/* Patched by G-HAL, Mon,13 Jul,2009, Sat,10 Oct,2009 */
      ce->score *= 10;
     #else
      ce->score = ce->score_tentative + (ce->score_base * 9);
     #endif
    }
  }
}

static int
get_indep_word_id(struct seg_ent *seg, int nth)
{
  struct cand_ent *ce;
  if (NULL == seg->cands) { /* ⤷ϳؽǡƤк */
    return -1;
  }
  if (seg->cands[nth]->core_elm_index == -1) {
    /* ܤθ䤬seq_ent줿ǤϤʤ */
    return -1;
  }
  ce = seg->cands[nth];
  /* ΩidФ */
  return ce->elm[ce->core_elm_index].id;
}

/* 㼭Ȥä¤ؤ򤹤 */
static void
reorder_by_use_dict(struct segment_list *sl, int nth)
{
  int i;
  struct seg_ent *cur_seg;
  int word_id;

  cur_seg = anthy_get_nth_segment(sl, nth);
  word_id = get_indep_word_id(cur_seg, 0);
  if (word_id == -1) {
    /**/
    return ;
  }
  /* ʸ˸Ƥ */
  for (i = nth - 2; i < nth + 2 && i < sl->nr_segments; i++) {
    struct seg_ent *target_seg;
    if (i < 0 || i == nth) {
      continue ;
    }
    /* iܤʸjܤʸФ */
    target_seg = anthy_get_nth_segment(sl, i);
    reorder_candidate(word_id, target_seg);
  }
}
#else
/** ΩΥϥåͤ
 *@param[in]		seg			
 *@retval		-1			Ω줬̵
 *@return					ΩΥϥå
 *
 *@comment
 *	Patched by G-HAL
 *		Mon,12 Oct,2009
 */
static int get_indep_word_id_by_ce( const struct cand_ent* const ce )
{
  if (ce->core_elm_index < 0) {
    return -1;
  }
  assert( ce->core_elm_index < ce->nr_words );
  return ce->elm[ce->core_elm_index].id;
}

/** ΩΥϥåͤ
 *@param[in]		seg			ʸ
 *@param		nth			ֹ
 *@retval		-1			Ω줬̵
 *@return					ΩΥϥå
 *
 *@comment
 *	Patched by G-HAL
 *		Mon,12 Oct,2009
 */
static int get_indep_word_id( const struct seg_ent* const seg, int nth )
{
  if (NULL == seg->cands) {
    return -1;
  }
  assert( (0 <= nth) && (nth < seg->nr_cands) );
  return get_indep_word_id_by_ce( seg->cands[nth] );
}



# if 0	/* 㼭Patched by G-HAL, Sun,18 Oct,2009 */

/** ʸˤ udictθŬѤԤ
 *@param		from_word_id		Ȥʤ뼫ΩΥϥå
 *@param[in]		seg			оݤȤʸ
 *@param		decision_flag		оݤϲѤʸ
 *@retval		0			Ŭ̵
 *@retval		1			Ŭͭ
 *
 *@comment
 *	Patched by G-HAL
 *		Mon,13 Jul,2009
 *		Sat,10 Oct,2009
 *		Mon,12 Oct,2009
 */
static int reorder_candidate( int from_word_id, struct seg_ent* const seg, int decision_flag )
{
  int i;
  int committed;
  int hit = 0;
  if (NULL == seg->cands) {
    return 0;
  }

  committed = seg->committed;
  if ((committed < 0) || (seg->nr_cands <= committed)) {
    committed = seg->provisional_committed;
  }
  if ((committed < 0) || (seg->nr_cands <= committed)) {
    committed = decision_flag ? 0 : -1;
  }

  if (0 <= committed) {
    struct cand_ent* const ce = seg->cands[committed];
    const int word_id = get_indep_word_id_by_ce( ce );
    if (-1 == word_id) {
      return 0;
    }
    if (anthy_dic_check_word_relation(from_word_id, word_id)) {
      return 1;
    }
    return 0;

  } else {
    for (i = 0; i < seg->nr_cands; ++i) {
      struct cand_ent* const ce = seg->cands[i];
      const int word_id = get_indep_word_id_by_ce( ce );
      if (-1 == word_id) {
	continue;
      }

      if (anthy_dic_check_word_relation(from_word_id, word_id)) {
	ce->flag |= CEF_USEDICT;
	ce->score = ce->score_tentative + UDICT_SCORE;
	hit = 1;
      }
    }
  }
  return hit;
}

/** udict㼭ŬѲȽŬѡ¦롼ס
 *@param[in,out]	sl			ʸᷲ
 *@param		start_nth		ŬѤ򳫻Ϥʸʸֹ
 *@param		nth			ŬȽԤʸʸֹ
 *@param[in,out]	ce			ŬȽԤʸδȤ
 *@param		start			ȽԤʸϰϡʸʤʸޤ
 *@param		end			ȽԤʸϰϡλʸʤʸޤǴޤ
 *
 *@comment
 *	Patched by G-HAL
 *		Mon,12 Oct,2009
 */
static void reorder_by_use_dict_on_ce( struct segment_list* const sl, int start_nth, int nth, struct cand_ent* const ce, int start, int end )
{
  int j;
  int hit = 0;
  const int word_id = get_indep_word_id_by_ce( ce );
  if (-1 == word_id) {
    return;
  }
  if (0 == anthy_dic_search_word_relation_key(word_id)) {
    return;
  }

  /* ٤ʸõ */
  for (j = start; j <= end; ++j) {
    if (j == nth) {
      continue;
    }
    { struct seg_ent* const target_seg = anthy_get_nth_segment( sl, j );
      const int ret = reorder_candidate( word_id, target_seg, (j < start_nth) );
      hit |= ret;
    }
  }
  if (hit) {
    ce->flag |= CEF_USEDICT;
    ce->score = ce->score_tentative + UDICT_SCORE;
  }
  return;
}

/** udict㼭ŬѲȽŬѡʳ¦롼ס
 *@param[in,out]	sl			ʸᷲ
 *@param		start_nth		ŬѤ򳫻Ϥʸʸֹ
 *@param		nth			ŬȽԤʸʸֹ
 *
 *@comment
 *	Patched by G-HAL
 *		Mon,12 Oct,2009
 */
static void reorder_by_use_dict( struct segment_list* const sl, int start_nth, int nth )
{
  int committed;
  const int start_j = (0 <= (nth - 2))              ? (nth - 2) : 0;
  const int end_j   = ((nth + 2) < sl->nr_segments) ? (nth + 2) : (sl->nr_segments - 1);
  const struct seg_ent* const datum_seg = anthy_get_nth_segment( sl, nth );
  if (NULL == datum_seg->cands) {
    return;
  }
 #if 0
  committed = datum_seg->committed;
  if ((committed < 0) || (datum_seg->nr_cands <= committed)) {
    committed = datum_seg->provisional_committed;
  }
  if ((committed < 0) || (datum_seg->nr_cands <= committed)) {
    committed = (nth < start_nth) ? 0 : -1;
  }

  if (0 <= committed) {
    struct cand_ent* const ce = datum_seg->cands[committed];
    reorder_by_use_dict_on_ce( sl, start_nth, nth, ce, start_j, end_j );
  } else {
 #endif
    int i;
    for (i = 0; i < datum_seg->nr_cands; ++i) {
      struct cand_ent* const ce = datum_seg->cands[i];
      reorder_by_use_dict_on_ce( sl, start_nth, nth, ce, start_j, end_j );
    }
 #if 0
  }
 #endif
  return;
}

# else	/* 㼭Patched by G-HAL, Sun,18 Oct,2009 */

/** ڥ ucdic˳뤫ݤȽꤷƲ
 *@param		distance		ȥʸεΥ
 *@param[in]		left_seg		¦Υ
 *@param[in,out]	right_ce		¦θ
 *@retval		INT_MIN			Ŭ̵
 *@return					ŬѤͥ
 *
 *@comment
 *	Patched by G-HAL
 *		Sun,18 Oct,2009 - Wed,21 Oct,2009
 *		Sat,31 Oct,2009 - Sun,01 Nov,2009
 *		Sat,07 Nov,2009
 *		Thu,12 Nov,2009 - Fri,13 Nov,2009
 *		Sun,29 Nov,2009
 */
static int reorder_by_ucdic_pair( int distance, const struct ucdic_seg_info* const left_seg, struct cand_ent* const right_ce )
{
  int score = INT_MIN;
  if (NR_PARTS != right_ce->nr_words) {
    return score;
  }
  { const uint32_t right_id_base = (0 < right_ce->elm[PART_POSTFIX].str.len) ? right_ce->elm[PART_POSTFIX].uc_id_tmp : ((0 <= right_ce->core_elm_index) ? right_ce->elm[right_ce->core_elm_index].uc_id_tmp : right_ce->elm[PART_CORE].uc_id_tmp);
    /* ܥڥ */
    int flag = anthy_dic_check_word_relation( left_seg->id_base_fin, anthy_hash_finalize(right_id_base) );
    if (0 == flag) {
      return score;
    }
    { /* ե饰Ƚ */
      int re_check = 0;
      uint32_t left_id  = left_seg->id_wo_dep;
      uint32_t right_id;
      const int seg1_deptype = flag & UCDICT_SEG1_NODE_DEPTYPE_FLAGS;
      const int seg2_deptype = flag & UCDICT_SEG2_NODE_DEPTYPE_FLAGS;

      if (UCDICT_NEIGHBOR_ONLY & flag) {
	if (1 < distance) {
	  return score;
	}
      }

      { /* °Ƚ */
	const struct cand_elm* const right_ce_elm_dep = &(right_ce->elm[PART_DEPWORD]);
	if (UCDICT_SEG1_NODE_NO_DEP == seg1_deptype) {
	  if (left_seg->have_dep) {
	    return score;
	  }
	} else if (UCDICT_SEG1_NODE_HAVE_DEP == seg1_deptype) {
	  if (!left_seg->have_dep) {
	    return score;
	  }
	  left_id = left_seg->id_w_dep;
	  re_check = 1;
	}
	if (UCDICT_SEG2_NODE_NO_DEP == seg2_deptype) {
	  if (right_ce_elm_dep && (0 < right_ce_elm_dep->str.len)) {
	    return score;
	  }
	  right_id = anthy_hash_uint32_update( right_id_base, '\0' );
	} else if (UCDICT_SEG2_NODE_HAVE_DEP == seg2_deptype) {
	  if ((NULL == right_ce_elm_dep) || (right_ce_elm_dep->str.len < 1)) {
	    return score;
	  }
	  right_id = anthy_hash_uint32_update( anthy_hash_xstr_update( right_id_base, &(right_ce_elm_dep->str) ), '\0' );
	  re_check = 1;
	} else {
	  right_id = anthy_hash_uint32_update( right_id_base, '\0' );
	}
      }

      { /* ʻȽ */
	if (UCDICT_SEG1_NODE_HAVE_WT & flag) {
	  left_id = anthy_hash_str_update( left_id, left_seg->wt );
	  re_check = 1;
	}
	left_id = anthy_hash_uint8_update( left_id, '\0' );
	if (UCDICT_SEG1_NODE_HAVE_WT_H & flag) {
	  left_id = anthy_hash_str_update( left_id, left_seg->wt_h );
	  re_check = 1;
	}
	left_id = anthy_hash_uint8_update( left_id, '\0' );
	if (UCDICT_SEG1_NODE_HAVE_WT_C & flag) {
	  left_id = anthy_hash_str_update( left_id, left_seg->wt_c );
	  re_check = 1;
	}
	left_id = anthy_hash_uint8_update( left_id, '\0' );
	if (UCDICT_SEG1_NODE_HAVE_WT_S & flag) {
	  left_id = anthy_hash_str_update( left_id, left_seg->wt_s );
	  re_check = 1;
	}
	left_id = anthy_hash_uint8_update( left_id, '\0' );

	if (UCDICT_SEG2_NODE_HAVE_WT & flag) {
	  const char* wt_name;
	  if (0 != anthy_get_ce_wtname(right_ce, &wt_name)) {
	    return score;
	  }
	  right_id = anthy_hash_str_update( right_id, wt_name );
	  re_check = 1;
	}
	right_id = anthy_hash_uint8_update( right_id, '\0' );
	if (UCDICT_SEG2_NODE_HAVE_WT_H & flag) {
	  right_id = anthy_hash_str_update( right_id, anthy_depgraph_pos_class_sym(right_ce->mw) );
	  re_check = 1;
	}
	right_id = anthy_hash_uint8_update( right_id, '\0' );
	if (UCDICT_SEG2_NODE_HAVE_WT_C & flag) {
	  right_id = anthy_hash_str_update( right_id, anthy_depgraph_ct_class_sym(right_ce->mw) );
	  re_check = 1;
	}
	right_id = anthy_hash_uint8_update( right_id, '\0' );
	if (UCDICT_SEG2_NODE_HAVE_WT_S & flag) {
	  right_id = anthy_hash_str_update( right_id, anthy_depgraph_dep_class_sym(right_ce->mw) );
	  re_check = 1;
	}
	right_id = anthy_hash_uint8_update( right_id, '\0' );
      }

      if (re_check) {
	/* ĥڥ */
	flag = anthy_dic_check_word_relation( anthy_hash_finalize(left_id), anthy_hash_finalize(right_id) );
	if (0 == flag) {
	  return score;
	}
	if ((UCDICT_SEG1_NODE_HAVE_DEP == seg1_deptype) && !(UCDICT_SEG1_LEAF_HAVE_DEP & flag)) {
	  return score;
	}
	if ((UCDICT_SEG2_NODE_HAVE_DEP == seg2_deptype) && !(UCDICT_SEG2_LEAF_HAVE_DEP & flag)) {
	  return score;
	}
      }
    }
    { /* ŬѼ¹ */
      const int hit_value   = ((UCDICT_PRIORITY_MINUS & flag) ? -1 : +1) * (UCDICT_PRIORITY_VALUE & flag) / distance;
      const int score_ratio = (right_ce->mw ? (right_ce->mw->struct_score) : (RATIO_BASE * RATIO_BASE)) * FREQ_RATIO / RATIO_BASE;
      const int add_score   = hit_value * score_ratio * anthy_settings.anthy_mode.candidate.ucdict_score_ratio;
      const int score_tmp   = right_ce->score_tentative + add_score;
      score = hit_value;
      if ((right_ce->score == right_ce->score_tentative) || (right_ce->score < score_tmp)) {
	#if defined(DEBUG) && (3 <= DEBUG)
	anthy_log(1, ((0 <= add_score) ? "TRACE: ucdic();?-?:?-?:++\n" : "TRACE: ucdic();?-?:?-?:--\n") );
	#endif
	right_ce->score = (1 < score_tmp) ? score_tmp : 1;
	right_ce->flag |= CEF_USEDICT;
      }
    }
  }
  return score;
}

/** ʸ򥭡Ȥ ucdicñʸˤθŬѤԤ
 *@param[in]		left_seg		
 *@param[in,out]	right_ce		
 *@retval		INT_MIN			Ŭ̵
 *@return					ŬѤͥ
 *
 *@comment
 *	Patched by G-HAL
 *		Thu,12 Nov,2009 - Sat,14 Nov,2009
 */
static int reorder_by_ucdic_single( const struct ucdic_seg_info* const left_seg, struct cand_ent* const left_ce )
{
  int score = INT_MIN;
  /* ܾ */
  int flag = anthy_dic_check_word_relation( left_seg->id_base_fin, 0 );
  if (0 == flag) {
    return score;
  }
  { /* ե饰Ƚ */
    int re_check = 0;
    uint32_t left_id  = left_seg->id_wo_dep;
    const int seg1_deptype = flag & UCDICT_SEG1_NODE_DEPTYPE_FLAGS;

    { /* °Ƚ */
      if (UCDICT_SEG1_NODE_NO_DEP == seg1_deptype) {
	if (left_seg->have_dep) {
	  return score;
	}
      } else if (UCDICT_SEG1_NODE_HAVE_DEP == seg1_deptype) {
	if (!left_seg->have_dep) {
	  return score;
	}
	left_id = left_seg->id_w_dep;
	re_check = 1;
      }
    }

    { /* ʻȽ */
      if (UCDICT_SEG1_NODE_HAVE_WT & flag) {
	left_id = anthy_hash_str_update( left_id, left_seg->wt );
	re_check = 1;
      }
      left_id = anthy_hash_uint8_update( left_id, '\0' );
      if (UCDICT_SEG1_NODE_HAVE_WT_H & flag) {
	left_id = anthy_hash_str_update( left_id, left_seg->wt_h );
	re_check = 1;
      }
      left_id = anthy_hash_uint8_update( left_id, '\0' );
      if (UCDICT_SEG1_NODE_HAVE_WT_C & flag) {
	left_id = anthy_hash_str_update( left_id, left_seg->wt_c );
	re_check = 1;
      }
      left_id = anthy_hash_uint8_update( left_id, '\0' );
      if (UCDICT_SEG1_NODE_HAVE_WT_S & flag) {
	left_id = anthy_hash_str_update( left_id, left_seg->wt_s );
	re_check = 1;
      }
      left_id = anthy_hash_uint8_update( left_id, '\0' );
    }

    if (re_check) {
      /* ĥ */
      flag = anthy_dic_check_word_relation( anthy_hash_finalize(left_id), 0 );
      if (0 == flag) {
	return score;
      }
      if ((UCDICT_SEG1_NODE_HAVE_DEP == seg1_deptype) && !(UCDICT_SEG1_LEAF_HAVE_DEP & flag)) {
	return score;
      }
    }
    { /* ŬѼ¹ */
      const int hit_value   = ((UCDICT_PRIORITY_MINUS & flag) ? -1 : +1) * (UCDICT_PRIORITY_VALUE & flag);
      const int score_ratio = (left_ce->mw ? (left_ce->mw->struct_score) : (RATIO_BASE * RATIO_BASE)) * FREQ_RATIO / RATIO_BASE;
      const int add_score   = hit_value * score_ratio * anthy_settings.anthy_mode.candidate.ucdict_score_ratio;
      const int score_tmp   = left_ce->score_tentative + add_score;
      score = hit_value;
      if ((left_ce->score == left_ce->score_tentative) || (left_ce->score < score_tmp)) {
       #if defined(DEBUG) && (3 <= DEBUG)
	anthy_log(1, ((0 <= add_score) ? "TRACE: ucdic();?-?::++\n" : "TRACE: ucdic();?-?::--\n") );
       #endif
	left_ce->score = (1 < score_tmp) ? score_tmp : 1;
	left_ce->flag |= CEF_USEDICT;
      }
    }
  }
  return score;
}

/** ʸᤫ ucdicΥθŬѤԤ
 *@param		distance		ȥʸεΥ
 *@param[in]		left_seg		¦Υ
 *@param[in,out]	seg2			оݤȤʸ
 *@retval		INT_MIN			Ŭ̵
 *@return					ŬѤͥ
 *
 *@comment
 *	Patched by G-HAL
 *		Sun,18 Oct,2009
 *		Sun,01 Nov,2009
 *		Sat,07 Nov,2009
 *		Thu,12 Nov,2009 - Fri,13 Nov,2009
 */
static int reorder_by_ucdic_on_seg2( int distance, const struct ucdic_seg_info* const left_seg, struct seg_ent* const seg2 )
{
  int committed;
  int hit_value = INT_MIN;
  if (NULL == seg2->cands) {
    return hit_value;
  }

  committed = seg2->committed;
  if ((committed < 0) || (seg2->nr_cands <= committed)) {
    committed = seg2->provisional_committed;
  }
  if ((committed < 0) || (seg2->nr_cands <= committed)) {
    committed = -1;
  }

  if (0 <= committed) {
    struct cand_ent* const ce2 = seg2->cands[committed];
   #if defined(DEBUG) && (3 <= DEBUG)
    anthy_log(1, "TRACE: ucdic();?-?:?=%d:\n", committed );
   #endif
    return reorder_by_ucdic_pair( distance, left_seg, ce2 );

  } else {
    int i;
   #if defined(DEBUG) && (3 <= DEBUG)
    anthy_log(1, "TRACE: ucdic();?-?:?-?:\n" );
   #endif
    for (i = 0; i < seg2->nr_cands; ++i) {
      struct cand_ent* const ce2 = seg2->cands[i];
     #if defined(DEBUG) && (3 <= DEBUG)
      anthy_log(1, "TRACE: ucdic();?-?:?-%d:\n", i );
     #endif
      const int ret = reorder_by_ucdic_pair( distance, left_seg, ce2 );
      if (hit_value < ret) {
	hit_value = ret;
      }
    }
  }
  return hit_value;
}

/** Ѵ򥭡Ȥ ucdicʣʸˤθŬѤԤ
 *@param[in,out]	sl			ʸᷲ
 *@param[in]		left_seg		¦Υ
 *@param		ce_num			¦Υʸֹ
 *@param		start_j			ʸϰϡʸʸֹʤʸޤ
 *@param		end_j			ʸϰϡλʸʸֹʤʸޤǴޤ
 *@retval		INT_MIN			Ŭ̵
 *@return					ŬѤͥ
 *
 *@comment
 *	Patched by G-HAL
 *		Sun,18 Oct,2009 - Tue,20 Oct,2009
 *		Sun,01 Nov,2009
 *		Sat,07 Nov,2009
 *		Thu,12 Nov,2009 - Fri,13 Nov,2009
 *		Mon,25 Oct,2010
 */
static int reorder_by_ucdic_with_seg1( struct segment_list* const sl, const struct ucdic_seg_info* const left_seg, int ce_num, int start_j, int end_j )
{
  int hit_value = INT_MIN;
  int j;
  for (j = start_j; j <= end_j; ++j) {
    if (ce_num == j) {
      continue;
    }
    struct seg_ent* const target_seg = anthy_get_nth_segment( sl, j );
    const int ret = reorder_by_ucdic_on_seg2( (j - ce_num), left_seg, target_seg );
    if (hit_value < ret) {
      hit_value = ret;
    }
  }
  return hit_value;
}

/** Ѵ򥭡Ȥ ucdicθŬѤԤ
 *@param[in,out]	sl			ʸᷲ
 *@param		ce_num			Υʸֹ
 *@param[in]		ce			Υ
 *@param		start_j			ʸϰϡʸʸֹʤʸޤ
 *@param		end_j			ʸϰϡλʸʸֹʤʸޤǴޤ
 *@retval		INT_MIN			Ŭ̵
 *@return					ŬѤͥ
 *
 *@comment
 *	Patched by G-HAL
 *		Sun,18 Oct,2009 - Tue,20 Oct,2009
 *		Sun,01 Nov,2009
 *		Sat,07 Nov,2009
 *		Thu,12 Nov,2009 - Sat,14 Nov,2009
 *		Sun,29 Nov,2009
 */
static int reorder_by_ucdic_with_seg1ce( struct segment_list* const sl, int ce_num, struct cand_ent* const ce, int start_j, int end_j )
{
  int score = INT_MIN;
  struct ucdic_seg_info left_seg;
  if (NR_PARTS != ce->nr_words) {
    return score;
  }
  left_seg.id_base     = (0 < ce->elm[PART_POSTFIX].str.len) ? ce->elm[PART_POSTFIX].uc_id_tmp : ((0 <= ce->core_elm_index) ? ce->elm[ce->core_elm_index].uc_id_tmp : ce->elm[PART_CORE].uc_id_tmp);
  left_seg.id_base_fin = anthy_hash_finalize( left_seg.id_base );
  if (0 == anthy_dic_search_word_relation_key(left_seg.id_base_fin)) {
    return score;
  }
  left_seg.have_dep  = (0 < ce->elm[PART_DEPWORD].str.len);
  left_seg.id_wo_dep = anthy_hash_uint32_update( left_seg.id_base, '\0' );
  left_seg.id_w_dep  = left_seg.have_dep ? anthy_hash_uint32_update( anthy_hash_xstr_update( left_seg.id_base, &(ce->elm[PART_DEPWORD].str) ), '\0') : left_seg.id_wo_dep;
  if (0 != anthy_get_ce_wtname(ce, &(left_seg.wt))) {
    return score;
  }
  left_seg.wt_h = anthy_depgraph_pos_class_sym( ce->mw );
  left_seg.wt_c = anthy_depgraph_ct_class_sym( ce->mw );
  left_seg.wt_s = anthy_depgraph_dep_class_sym( ce->mw );

  score = reorder_by_ucdic_single( &left_seg, ce );
  {
    const int hit_value = reorder_by_ucdic_with_seg1( sl, &left_seg, ce_num, start_j, end_j );
    if (score < hit_value) {
      score = hit_value;
    }
  }
  return score;
}

/** ucdic㼭ŬѲȽŬ
 *@param[in,out]	sl			ʸᷲ
 *@param		start_nth		ɾ򳫻Ϥʸֹ
 *
 *@comment
 *	Patched by G-HAL
 *		Sun,18 Oct,2009 - Wed,21 Oct,2009
 *		Sat,31 Oct,2009 - Sun,01 Nov,2009
 *		Tue,10 Nov,2009
 *		Thu,12 Nov,2009 - Sat,14 Nov,2009
 */
void anthy_reorder_candidates_by_ucdic( struct splitter_context* const sc, struct segment_list* const sl, int start_nth )
{
  const int start_i = (0 <= (start_nth - UCDICT_SEG_DISTANCE_MAX)) ? (start_nth - UCDICT_SEG_DISTANCE_MAX) : -1;
  const int end_i   = (sl->nr_segments - 1);
  int i;
  if (0 == anthy_settings.anthy_mode.candidate.ucdict_score_ratio) {
    return;
  }
  for (i = start_i; i <= end_i; ++i) {
    const int start_j = ((i + UCDICT_SEG_DISTANCE_MIN) < sl->nr_segments) ? (i + UCDICT_SEG_DISTANCE_MIN) : (sl->nr_segments - 1);
    const int end_j   = ((i + UCDICT_SEG_DISTANCE_MAX) < sl->nr_segments) ? (i + UCDICT_SEG_DISTANCE_MAX) : (sl->nr_segments - 1);

    if (i < 0) {
      if (anthy_settings.anthy_mode.keepalive.enable_refer_ucdic_for_candidate) {
	const struct ucdic_seg_info* const left_seg = sc->prev_commit.seg_ucinfo;
	if (left_seg) {
	  reorder_by_ucdic_with_seg1( sl, left_seg, i, ((0 <= start_j) ? start_j : 0), ((0 <= end_j) ? end_j : 0) );
	}
      }

    } else {
      const struct seg_ent* const datum_seg = anthy_get_nth_segment( sl, i );
      if (NULL == datum_seg->cands) {
	continue;
      }
      { /* ʸѴ롼 */
	if (i < start_nth) {
	  int committed;
	  committed = datum_seg->committed;
	  if ((committed < 0) || (datum_seg->nr_cands <= committed)) {
	    committed = datum_seg->provisional_committed;
	  }
	  if ((committed < 0) || (datum_seg->nr_cands <= committed)) {
	    committed = 0;
	  }
	  {
	    struct cand_ent* const ce = datum_seg->cands[committed];
	    #if defined(DEBUG) && (3 <= DEBUG)
	    anthy_log(1, "TRACE: ucdic();%d=%d:\n", i, committed );
	    #endif
	    reorder_by_ucdic_with_seg1ce( sl, i, ce, start_j, end_j );
	  }
	} else {
	  int k;
	  #if defined(DEBUG) && (3 <= DEBUG)
	  anthy_log(1, "TRACE: ucdic();%d-?:\n", i );
	  #endif
	  for (k = 0; k < datum_seg->nr_cands; ++k) {
	    struct cand_ent* const ce = datum_seg->cands[k];
	    const int hit_value = reorder_by_ucdic_with_seg1ce( sl, i, ce, start_j, end_j );
	    if (INT_MIN < hit_value) {
	      const int score_ratio = (ce->mw ? (ce->mw->struct_score) : (RATIO_BASE * RATIO_BASE)) * FREQ_RATIO / RATIO_BASE;
	      const int add_score   = hit_value * score_ratio * anthy_settings.anthy_mode.candidate.ucdict_score_ratio;
	      const int score_tmp   = ce->score_tentative + add_score;
	      if (ce->score < score_tmp) {
	       #if defined(DEBUG) && (3 <= DEBUG)
		anthy_log(1, ((0 <= add_score) ? "TRACE: ucdic();%d-%d:++\n" : "TRACE: ucdic();%d-%d:--\n"), i, k );
	       #endif
		ce->score = (1 < score_tmp) ? score_tmp : 1;
		ce->flag |= CEF_USEDICT;
	      }
	    }
	  }
	}
      }
    }
  }
  return;
}
# endif
#endif

static int
find_border_of_this_word(int idx)
{
  int val;
  if (idx < 0) {
    return 0;
  }
  val = ntohl(corpus_info.array[idx * 2]);
  while (!(val & ELM_WORD_BORDER) &&
	 idx > -1) {
    idx --;
  }
  return idx;
}

static int
find_left_word_border(int idx)
{
  int val;
  if (idx == -1) {
    return -1;
  }
  val = ntohl(corpus_info.array[idx * 2]);
  if (val & ELM_BOS) {
    return -1;
  }
  idx --;
  return find_border_of_this_word(idx);
}

static int
find_right_word_border(int idx)
{
  if (idx == -1) {
    return -1;
  }
  while (idx < corpus_info.array_size - 2) {
    int val;
    idx ++;
    val = ntohl(corpus_info.array[idx * 2]);
    if (val & ELM_BOS) {
      return -1;
    }
    if (val & ELM_WORD_BORDER) {
      return idx;
    }
  }
  return -1;
}

static void
push_id(struct neighbor *ctx,
	int id)
{
  if (ctx->nr < MAX_NEIGHBOR - 1) {
    ctx->id[ctx->nr] = id;
    ctx->nr++;
  }
}

static void
collect_word_context(struct neighbor *ctx, int idx)
{
  int id = ntohl(corpus_info.array[idx * 2]) & CORPUS_KEY_MASK;
  /*printf("  id=%d\n", id);*/
  push_id(ctx, id);
}

/* ʸǼդξ */
static void
collect_corpus_context(struct neighbor *ctx,
		       struct iterator *it)
{
  int i;
  int this_idx, idx;

  this_idx = find_border_of_this_word(it->idx);

  /*printf(" key=%d\n", it->key);*/
  /* إ */
  idx = this_idx;
  for (i = 0; i < 2; i++) {
    idx = find_left_word_border(idx);
    if (idx == -1) {
      break;
    }
    collect_word_context(ctx, idx);
  }
  /* إ */
  idx = this_idx;
  for (i = 0; i < 2; i++) {
    idx = find_right_word_border(idx);
    if (idx == -1) {
      break;
    }
    collect_word_context(ctx, idx);
  }
}

#if 0	/* Patched by G-HAL, Mon,12 Oct,2009 */
/* Ѵоݤʸμդξ */
static void
collect_user_context(struct neighbor *ctx,
		     struct segment_list *sl, int nth)
{
  int i;
  ctx->nr = 0;
  for (i = nth - 2; i <= nth + 2 && i < sl->nr_segments; i++) {
    int id;
    if ((i < 0) || (i == nth)) {
      continue;
    }
    id = get_indep_word_id(anthy_get_nth_segment(sl, i), 0);
    if (id > -1) {
      id &= CORPUS_KEY_MASK;
      /*printf("user_ctx=%d\n", id);*/
      push_id(ctx, id);
    }
  }
}
#else
/* Ѵоݤʸμդξ */
static void collect_user_context( struct neighbor* const ctx, struct segment_list* const sl, int nth )
{
  int i;
  const int start_i = (0 <= (nth - 2))              ? (nth - 2) : 0;
  const int end_i   = ((nth + 2) < sl->nr_segments) ? (nth + 2) : (sl->nr_segments - 1);

  ctx->nr = 0;
  for (i = start_i; i <= end_i; ++i) {
    const struct seg_ent* const seg = anthy_get_nth_segment( sl, i );
    int committed;
    int id;
    if (i == nth) {
      continue;
    }
    committed = seg->committed;
    if ((committed < 0) || (seg->nr_cands <= committed)) {
      committed = seg->provisional_committed;
    }
    if ((committed < 0) || (seg->nr_cands <= committed)) {
      committed = 0;
    }
    id = get_indep_word_id( seg, committed );
    if (-1 != id) {
      id &= CORPUS_KEY_MASK;
      /*printf("user_ctx=%d\n", id);*/
      push_id( ctx, id );
    }
  }
  return;
}
#endif

/* ʸξӤ */
static int 
do_compare_context(struct neighbor *n1,
		   struct neighbor *n2)
{
  int i, j;
  int m = 0;
  for (i = 0; i < n1->nr; i++) {
    for (j = 0; j < n2->nr; j++) {
      if (n1->id[i] == n2->id[j]) {
	m++;
      }
    }
  }
  return m;
}

/* ʸξӤ */
static int
compare_context(struct neighbor *user,
		struct iterator *it)
{
  struct neighbor sample;
  int nr;
  /**/
  sample.nr = 0;
  /* ʸμվ򽸤 */
  collect_corpus_context(&sample, it);
  if (sample.nr == 0) {
    return 0;
  }
  /* Ӥ */
  nr = do_compare_context(user, &sample);
  if (nr >= sample.nr / 2) {
    return nr;
  }
  return 0;
}

/* keyκǽνи򸫤Ĥ
 * Ĥʤä-1֤
 */
static int
find_first_pos(int key)
{
  int i;
  for (i = 0; i < MAX_COLLISION; i++) {
    int bkt = (key + i) % corpus_info.bucket_size;
    if ((int)ntohl(corpus_info.bucket[bkt * 2]) == key) {
      return ntohl(corpus_info.bucket[bkt * 2 + 1]);
    }
  }
  return -1;
}

/* keyκǽνиiterator
 * Ĥʤä-1֤
 */
static int
find_first_from_corpus(int key, struct iterator *it, int limit)
{
  key &= CORPUS_KEY_MASK;
  it->idx = find_first_pos(key);
  it->key = key;
  it->limit = limit;
  return it->idx;
}

/* keyμνиiteratorꤹ
 */
static int
find_next_from_corpus(struct iterator *it)
{
  int idx = it->idx;
  it->limit--;
  if (it->limit < 1) {
    it->idx = -1;
    return -1;
  }
  it->idx = ntohl(corpus_info.array[it->idx * 2 + 1]);
  if (it->idx < 0 || it->idx >= corpus_info.array_size ||
      it->idx < idx) {
    it->idx = -1;
  }
  return it->idx;
}

static void
check_candidate_context(struct seg_ent *cur_seg,
			int i,
			struct neighbor *user)
{
  struct iterator it;
  int nr = 0;
  int word_id;
 #if 0	/* Patched by G-HAL, Mon,12 Oct,2009 */
 #else
  cur_seg->cands[i]->flag &= ~(CEF_CONTEXT);
 #endif
  word_id = get_indep_word_id(cur_seg, i);
  if (word_id == -1) {
    return ;
  }
  /* ƽи򥹥󤹤 */
  find_first_from_corpus(word_id, &it, SEARCH_LIMIT);
  /*printf("word_id=%d %d\n", word_id, it.idx);*/
  while (it.idx > -1) {
    nr += compare_context(user, &it);
    /**/
    find_next_from_corpus(&it);
  }
  /**/
  if (nr > 0) {
    cur_seg->cands[i]->flag |= CEF_CONTEXT;
  }
}

#if 0	/* Patched by G-HAL, Mon,12 Oct,2009 */
/* ʸǸ¤ؤ */
static void
reorder_by_corpus(struct segment_list *sl, int nth)
#else
/** corpus㼭ŬѤ
 *@param[in,out]	sl			ʸᷲ
 *@param		start_nth		ŬѤ򳫻Ϥʸʸֹ
 *@param		nth			ŬȽԤʸʸֹ
 *
 *@comment
 *	corpus㼭ξ硢
 *	ŬȽԤʸסŬоݤȤʤʸפʤΤǡ
 *	start_nth ϰ̣ʤudict㼭ȤоݤΤߡ
 *
 *	Patched by G-HAL
 *		Mon,12 Oct,2009
 */
static void reorder_by_corpus( struct segment_list* const sl, int start_nth, int nth )
#endif
{
  struct seg_ent *cur_seg;
  struct neighbor user;
  int i;
  /* ʸμվ򽸤 */
  collect_user_context(&user, sl, nth);
  if (user.nr == 0) {
    return ;
  }
  cur_seg = anthy_get_nth_segment(sl, nth);
  if (NULL == cur_seg->cands) { /* ⤷ϳؽǡƤк */
    return;
  }
  /* ƸˤĤ */
  for (i = 0; i < cur_seg->nr_cands; i++) {
    check_candidate_context(cur_seg, i, &user);
  }
  /* ȥåפθ㤬С¾θϸʤ */
  if (cur_seg->cands[0]->flag & CEF_CONTEXT) {
    cur_seg->cands[0]->flag &= ~CEF_CONTEXT;
    return ;
  }
  /* ˤ륹û */
  for (i = 1; i < cur_seg->nr_cands; i++) {
    if (cur_seg->cands[i]->flag & CEF_CONTEXT) {
     #if 0	/* Patched by G-HAL, Mon,13 Jul,2009, Sat,10 Oct,2009, Wed,21 Oct,2009, Sat,31 Oct,2009 */
      cur_seg->cands[i]->score *= 2;
     #else
      struct cand_ent* const ce = cur_seg->cands[i];
      const int score_ratio = (ce->mw ? (ce->mw->struct_score) : (RATIO_BASE * RATIO_BASE)) * FREQ_RATIO / RATIO_BASE;
      const int add_score   = score_ratio * anthy_settings.anthy_mode.candidate.udict_corpus_score;
      const int score_tmp   = ce->score_tentative + add_score;
      if (ce->score < score_tmp) {
	ce->score = score_tmp;
	ce->flag |= CEF_USEDICT;
      }
     #endif
    }
  }
}

/*
 * ѤƸ¤ؤ
 *  @nthܰʹߤʸоݤȤ
 */
void
anthy_reorder_candidates_by_relation(struct segment_list *sl, int nth)
{
 #if 0	/* Patched by G-HAL, Mon,12 Oct,2009, Sun,01 Nov,2009, Tue,10 Nov,2009 */
  int i;
  for (i = nth; i < sl->nr_segments; i++) {
    reorder_by_use_dict(sl, i);
    reorder_by_corpus(sl, i);
  }
 #else
  int i;
  if (0 == anthy_settings.anthy_mode.candidate.udict_corpus_score) {
    return;
  }
  for (i = nth; i < sl->nr_segments; i++) {
    reorder_by_corpus( sl, nth, i );
  }
  return;
 #endif
}

void
anthy_relation_init(void)
{
  corpus_info.corpus_array = anthy_file_dic_get_section("corpus_array");
  corpus_info.corpus_bucket = anthy_file_dic_get_section("corpus_bucket");
  if (!corpus_info.corpus_array ||
      !corpus_info.corpus_array) {
    return ;
  }
  corpus_info.array_size = ntohl(((int *)corpus_info.corpus_array)[1]);
  corpus_info.bucket_size = ntohl(((int *)corpus_info.corpus_bucket)[1]);
  corpus_info.array = &(((int *)corpus_info.corpus_array)[16]);
  corpus_info.bucket = &(((int *)corpus_info.corpus_bucket)[16]);
  /*
  {
    int i;
    for (i = 0; i < corpus_info.array_size; i++) {
      int v = ntohl(corpus_info.array[i * 2]);
      printf("%d: %d %d\n", i, v, v & CORPUS_KEY_MASK);
    }
  }
  */
}
/* vim:ts=8 sw=2 nomodified:
 */
