/**@file
 *			Patches for Anthy, by G-HAL
 *@brief	ʸڤꥢ르ꥺࡧʬ
 *@date		Wed,05 Nov,2008
 *@date		Sun,16 Nov,2008
 *@date		Tue,18 Nov,2008
 *@date		Thu,20 Nov,2008
 *@date		Mon,05 Jan,2009
 *@date		Sat,17 Jan,2009
 *@date		Mon,19 Jan,2009
 *@date		Sat,24 Jan,2009, Thu,29 Jan,2009
 *@date		Tue,03 Feb,2009
 *@date		Wed,04 Feb,2009 - Thu,05 Feb,2009, Sun,08 Feb,2009
 *@date		Fri,20 Feb,2009, Sat,21 Feb,2009, Tue,24 Feb,2009
 *@date		Fri,06 Mar,2009
 *@date		Sat,25 Apr,2009
 *@date		Fri,01 May,2009
 *@date		Sun,07 Jun,2009
 *@date		Thu,27 Aug,2009
 *@date		Sat,17 Oct,2009
 *@date		Tue,03 Nov,2009 - Wed,04 Nov,2009
 *@date		Fri,06 Nov,2009
 *@date		Sun,08 Nov,2009
 *@date		Thu,12 Nov,2009 - Sun,15 Nov,2009
 *@date		Tue,17 Nov,2009 - Wed,18 Nov,2009
 *@date		Fri,27 Nov,2009
 *@date		Tue,20 Jul,2010 - Wed,21 Jul,2010
 *@date		Mon,16 Aug,2010 - Wed,18 Aug,2010
 *@date		Wed,13 Oct,2010 - Thu,14,Oct,2010
 *@date		Fri,07 Oct,2011
 *@author	Copyright(C)2008-2011 G-HAL
 */
#if defined(HAVE_CONFIG_H)
# include "config.h"
#endif

#if defined(HAVE_STDLIB_H)
# include <stdlib.h>
#endif
#if defined(HAVE_MATH_H)
# include <math.h>
#endif
#if defined(HAVE_ASSERT_H)
# include <assert.h>
#endif

#include "anthy/settings.h"
#include "anthy/logger.h"
#include "anthy/record.h"
#include "anthy/cand_ent_score.h"
#include "src-worddic/dic_ent.h"
#include "src-worddic/dic_main.h"
#include "src-splitter/lattice.h"
#include "src-splitter/metaword_relationscore.h"



/** metaword ɾ׻
 *@param[in]		sc				ϥǡ
 *@param[in]		mw				ɾʸ
 *@param[in]		prev_mw			ʸ
 *@param[in,out]	mw_kind			ʸζڤͥټ
 *@param[in,out]	mw_prob			ʸ³Ψ
 *@return							ɾ
 *
 *	Patched by G-HAL
 *		Sun,16 Nov,2008
 *		Thu,20 Nov,2008
 *		Tue,03 Feb,2009
 *		Wed,04 Feb,2009
 *		Sat,21 Feb,2009, Tue,24 Feb,2009
 *		Sat,25 Apr,2009
 *		Tue,03 Nov,2009 - Wed,04 Nov,2009
 *		Fri,06 Nov,2009
 *		Sun,08 Nov,2009
 *		Thu,12 Nov,2009 - Sun,15 Nov,2009
 *		Tue,17 Nov,2009
 *		Fri,27 Nov,2009
 *		Tue,20 Jul,2010 - Wed,21 Jul,2010
 *		Mon,16 Aug,2010 - Wed,18 Aug,2010
 *		Fri,07 Oct,2011
 */
double calc_metaword_score( const struct splitter_context* const sc,
						   struct meta_word* const mw, const struct meta_word* const prev_mw,
						   enum splitkind_of_metaword* const mw_kind, double* const mw_prob )
{
	double	score = 0.0;
	if (mw_kind) {
		*mw_kind = SPLITKIND_DEFAULT;
	}
	if (mw_prob) {
		*mw_prob = 0.0;
	}
	if (NULL == mw) {
		return 0.0;
	}

	/* ܥ */
	score = anthy_settings.anthy_mode.lattice.with_cand_score ? (((double)mw->cand_hint_freq) * 3.0 / FREQ_RATIO) : mw->score;

	/* ʣˤ */
	if ((0.0 != anthy_settings.anthy_mode.lattice.biasscore_by_compound)
		&& (MW_COMPOUND_LEAF == mw->type)
		&& (NULL != mw->mw1_left)
		&& (NULL != prev_mw)
	) {
		int	hit = 0;
		const struct meta_word* const	left_mw = (MW_WRAP == prev_mw->type) ? prev_mw->mw1 : prev_mw;
		const xstr* const				cand_hint = &(mw->mw1_left->cand_hint);
		if (left_mw == mw->mw1_left) {
			hit = 1;
		} else if ((0 < left_mw->cand_hint.len) && (0 == anthy_xstrcmp(&(left_mw->cand_hint), cand_hint))) {
			hit = 1;
		} else if (NULL == left_mw->wl) {
			hit = 0;
		} else if (PART_CORE <= left_mw->nr_parts) {
			const struct part_info* const	left_p_indep          = &(left_mw->wl->part[PART_CORE]);
			const struct seq_ent* const		left_se_indep         = left_p_indep->seq;
		  #if 0		/* Patched by G-HAL, Fri,07 Oct,2011 */
			const wtype_t					left_wt               = anthy_get_wtype_with_ct( left_p_indep->wt, CT_NONE );
		  #else
			wtype_t left_wt;
			anthy_get_wtype_with_ct( &left_wt, left_p_indep->wt, CT_NONE );
		  #endif
			if ((left_p_indep->len < 1) || (NULL == left_se_indep)) {
				/* Ω줬̵ */
			} else {
				int i;
				const struct dic_ent* const* left_dic_ents;
				for (i = (left_se_indep->nr_dic_ents - 1), left_dic_ents = left_se_indep->dic_ents;
					 0 <= i;
					 --i, ++left_dic_ents
				) {
					const struct dic_ent* const	left_de = *left_dic_ents;
					if (!anthy_wtype_include(left_wt, left_de->type)) {
						continue;
					}
					if (0 == anthy_xstrcmp(&(left_de->str), cand_hint)) {
						hit = 1;
						break;
					}
				}
			}
		} else {
			hit = 0;
		}

		if (hit) {
			score += anthy_settings.anthy_mode.lattice.biasscore_by_compound;
			if (mw_kind) {
				*mw_kind = SPLITKIND_COMPOUNDLEAF;
			}
		}
	}

	/* 㼭ˤ */
	if (0.0 != anthy_settings.anthy_mode.lattice.biasscore_by_ucdic) {
		const int		hit_value = calc_metaword_relation_score( sc, prev_mw, mw );
		const double	add_score = hit_value * anthy_settings.anthy_mode.lattice.biasscore_by_ucdic;
		if (0 <= hit_value) {
			score += add_score;
		} else {
			score = 0.0;
		}
	}

	/* Ϣʸγؽˤ */
	if ((MW_ochaire_min <= mw->type) && (mw->type <= MW_ochaire_max)) {
		static const double rate[SPLITKIND_SIZE] = {
			[SPLITKIND_OCHAIRE_MIDDLE      ] = 1.00,	/*   MW_OCHAIRE    |  MW_OCHAIRE    |  MW_OCHAIRE     */
			[SPLITKIND_OCHAIREwoD_MIDDLE   ] = 0.95,	/*  MW_OCHAIREwoD  | MW_OCHAIREwoD  | MW_OCHAIREwoD   */
			[SPLITKIND_OCHAIREwoI_MIDDLE   ] = 0.95,	/*  MW_OCHAIREwoI  | MW_OCHAIREwoI  | MW_OCHAIREwoI   */
			[SPLITKIND_OCHAIREwoIwoD_MIDDLE] = 0.85,	/* MW_OCHAIREwoIwoD|MW_OCHAIREwoIwoD|MW_OCHAIREwoIwoD */
			[SPLITKIND_OCHAIRE_HEAD        ] = 0.90,	/*               |  MW_OCHAIRE    |   MW_OCHAIRE    */
			[SPLITKIND_OCHAIREwoD_HEAD     ] = 0.85,	/*               | MW_OCHAIREwoD  | MW_OCHAIREwoD   */
			[SPLITKIND_OCHAIRE_TAIL        ] = 1.00,	/*   MW_OCHAIRE    |  MW_OCHAIRE    |               */
			[SPLITKIND_OCHAIRE_SINGLE      ] = 0.25,	/*               |  MW_OCHAIRE    |               */
			[SPLITKIND_OCHAIREwoI_TAIL     ] = 0.35,	/*  MW_OCHAIREwoI  | MW_OCHAIREwoI  |               */
			[SPLITKIND_OCHAIREwoD_TAIL     ] = 0.30,	/*  MW_OCHAIREwoD  | MW_OCHAIREwoD  |               */
			[SPLITKIND_OCHAIREwoIwoD_TAIL  ] = 0.10,	/* MW_OCHAIREwoIwoD|MW_OCHAIREwoIwoD|               */
			[SPLITKIND_OCHAIREwoD_SINGLE   ] = 0.05,	/*               | MW_OCHAIREwoD  |               */
			[SPLITKIND_CANDHISTORY         ] = 0.00,
			[SPLITKIND_DEFAULT             ] = 0.00,
			[SPLITKIND_COMPOUNDHEAD        ] = 0.00,
			[SPLITKIND_COMPOUNDPART        ] = 0.00,
			[SPLITKIND_COMPOUND            ] = 0.00,
			[SPLITKIND_COMPOUNDLEAF        ] = 0.00,
			[SPLITKIND_OCHAIREwoI_HEAD     ] = 0.00,	/*               | MW_OCHAIREwoI  | MW_OCHAIREwoI   */
			[SPLITKIND_OCHAIREwoIwoD_HEAD  ] = 0.00,	/*               |MW_OCHAIREwoIwoD|MW_OCHAIREwoIwoD */
			[SPLITKIND_OCHAIREwoI_SINGLE   ] = 0.00,	/*               | MW_OCHAIREwoI  |               */
			[SPLITKIND_OCHAIREwoIwoD_SINGLE] = 0.00,	/*               |MW_OCHAIREwoIwoD|               */
			[SPLITKIND_DELETEDHISTORY      ] = 0.00,
		};
		const enum splitkind_of_metaword kind = get_metaword_splitkind( mw );
		score += (anthy_settings.anthy_mode.lattice.biasscore_by_ochaire * rate[kind]);
		if (mw_kind) {
			*mw_kind = kind;
		}
		if (mw_prob) {
			const double base_prob = anthy_settings.anthy_mode.lattice.biasprob_by_ochaire * rate[kind];
			const double add_prob  = (base_prob * 0.8) * (1.0 + log10( (double)mw->cand_hint_learned_freq ) / 5.2 );	/* 20 ˰¡ */
			*mw_prob += add_prob;
		}
	}

	/* ؽˤ */
	if ((0.0 != anthy_settings.anthy_mode.lattice.biasscore_by_learnedfreq) && (0 < mw->cand_hint_learned_freq)) {
		score += (log10( (double)mw->cand_hint_learned_freq ) * anthy_settings.anthy_mode.lattice.biasscore_by_learnedfreq);
	}

	/* °γؽˤ */
	if (0.0 != anthy_settings.anthy_mode.lattice.biasscore_by_dephistory) {
		if (-1 == mw->extra_score) {
			mw->extra_score = 0;
			do {
				xstr	xs;
				if (sc->char_count < (mw->from + mw->len - mw->cand_hint_length_of_dep)) {
					anthy_log( 1, "***BUG*** calc_metaword_score() max:%d, from:%d, len:%d, dep-len:%d\n",
							  sc->char_count, mw->from, mw->len, mw->cand_hint_length_of_dep );
					break;
				}
				if (anthy_select_section(DEP_HISTORY,0)) {
					break;
				}
				xs.str = sc->ce[mw->from + mw->len - mw->cand_hint_length_of_dep].c;
				xs.len = mw->cand_hint_length_of_dep;
				if ((0 < xs.len) && !anthy_select_row(&xs,0)) {
					const int	count = anthy_get_row_frequency();
					if (0 < count) {
						mw->extra_score = (int)( anthy_settings.anthy_mode.lattice.biasscore_by_dephistory * log10( 1.0 + count ) );
					}
				}
			} while(0);
		}
		score += mw->extra_score;
	}

	/* ʸṽ¤ˤ */
	if (anthy_settings.anthy_mode.lattice.with_candstruct_score) {
		if (prev_mw) {
			const struct seg_dep_info_t prev_commit = {
				seg_class :     prev_mw->seg_class,
				dep_class :     prev_mw->dep_class,
				dep_word_hash : prev_mw->dep_word_hash,
			};
			anthy_mw_eval( &prev_commit, mw, anthy_settings.anthy_mode.lattice.with_cand_corpus );
		} else {
			anthy_mw_eval( &(sc->prev_commit), mw, anthy_settings.anthy_mode.lattice.with_cand_corpus );
		}
		score = score * mw->struct_score / RATIO_BASE;

		/* °ܤ¿ˤϥ򲼤롧Σ */
		if (anthy_settings.anthy_mode.depgraph.score.decrease_biasscore_threshold <= mw->cand_hint_depth_of_dep) {
			score += anthy_settings.anthy_mode.depgraph.score.decrease_biasscore;
		}
		score += (mw->cand_hint_depth_of_dep * anthy_settings.anthy_mode.depgraph.score.decrease_score);
	}

	if (score < 0.0) {
		score = 0.0;
	}
	if (mw_prob && (1.0 < (*mw_prob))) {
		*mw_prob = 1.0;
	}
	return score;
}



/** metaword ͥټ̤
 *@param[in]		mw					ͥ٤metaword
 *@return								ͥټ
 *
 *	Patched by G-HAL
 *		Wed,05 Nov,2008
 *		Sun,16 Nov,2008
 *		Tue,18 Nov,2008
 *		Thu,20 Nov,2008
 *		Mon,05 Jan,2009
 *		Sat,17 Jan,2009
 *		Mon,19 Jan,2009
 *		Sat,24 Jan,2009, Thu,29 Jan,2009
 *		Sun,08 Feb,2009
 *		Tue,24 Feb,2009
 *		Fri,01 May,2009
 *		Thu,27 Aug,2009
 *		Sun,15 Nov,2009
 *		Tue,17 Nov,2009
 */
enum splitkind_of_metaword get_metaword_splitkind( const struct meta_word* const mw )
{
	switch (mw->type) {
	case MW_OCHAIRE:
		if (mw->score <= 0) {
			return SPLITKIND_DELETEDHISTORY;
		}
		if (NULL == mw->mw1_left) {
			if (NULL == mw->mw1_right) {
				return SPLITKIND_OCHAIRE_SINGLE;
			}
			return SPLITKIND_OCHAIRE_HEAD;
		}
		if (NULL == mw->mw1_right) {
			return SPLITKIND_OCHAIRE_TAIL;
		}
		return SPLITKIND_OCHAIRE_MIDDLE;
		break;
	case MW_OCHAIREwithoutINDEP:
		if (mw->score <= 0) {
			return SPLITKIND_DELETEDHISTORY;
		}
		if (NULL == mw->mw1_left) {
			if (NULL == mw->mw1_right) {
				return SPLITKIND_OCHAIREwoI_SINGLE;
			}
			return SPLITKIND_OCHAIREwoI_HEAD;
		}
		if (NULL == mw->mw1_right) {
			return SPLITKIND_OCHAIREwoI_TAIL;
		}
		return SPLITKIND_OCHAIREwoI_MIDDLE;
		break;
	case MW_OCHAIREwithoutDEP:
		if (mw->score <= 0) {
			return SPLITKIND_DELETEDHISTORY;
		}
		if (NULL == mw->mw1_left) {
			if (NULL == mw->mw1_right) {
				return SPLITKIND_OCHAIREwoD_SINGLE;
			}
			return SPLITKIND_OCHAIREwoD_HEAD;
		}
		if (NULL == mw->mw1_right) {
			return SPLITKIND_OCHAIREwoD_TAIL;
		}
		return SPLITKIND_OCHAIREwoD_MIDDLE;
		break;
	case MW_OCHAIREwithoutINDEPwithoutDEP:
		if (mw->score <= 0) {
			return SPLITKIND_DELETEDHISTORY;
		}
		if (NULL == mw->mw1_left) {
			if (NULL == mw->mw1_right) {
				return SPLITKIND_OCHAIREwoIwoD_SINGLE;
			}
			return SPLITKIND_OCHAIREwoIwoD_HEAD;
		}
		if (NULL == mw->mw1_right) {
			return SPLITKIND_OCHAIREwoIwoD_TAIL;
		}
		return SPLITKIND_OCHAIREwoIwoD_MIDDLE;
		break;
	case MW_CANDHISTORY:
		if (mw->score <= 0) {
			return SPLITKIND_DELETEDHISTORY;
		}
		return SPLITKIND_CANDHISTORY;
		break;
	case MW_COMPOUND_HEAD:
		return SPLITKIND_COMPOUNDHEAD;
		break;
	case MW_COMPOUND_PART:
		return SPLITKIND_COMPOUNDPART;
		break;
	case MW_COMPOUND:
		return SPLITKIND_COMPOUND;
		break;
	case MW_COMPOUND_LEAF:
		return SPLITKIND_COMPOUNDLEAF;
		break;
	default:
		return SPLITKIND_DEFAULT;
		break;
	}
	assert(0);
	return SPLITKIND_DELETEDHISTORY;
}



/** metaword ͥ٤׻
 *@param[in]		pri					Ѥͥٽɽ
 *@param[in]		mw					ͥ٤metaword
 *@param			cmp_priority		оݤȤʤ priority
 *@param			cmp_minlen			оݤȤʤʸ
 *@param[out]		have_sub_priority	sub-priority Ĥݤ
 *@param[out]		ret_minlen			Ӵʸоݤ cmp_minlen ȤϤ͡
 *@return								ͥ١礭ۤɹ⤤
 *
 *	(lhs|rhs)->mw->type ˤ MW_OCHAIRE, MW_CANDHISTORY ʳʪ⤢Τա
 *
 *	Patched by G-HAL
 *		Wed,05 Nov,2008
 *		Sun,16 Nov,2008
 *		Tue,18 Nov,2008
 *		Thu,20 Nov,2008
 *		Mon,05 Jan,2009
 *		Sat,17 Jan,2009
 *		Mon,19 Jan,2009
 *		Sat,24 Jan,2009, Thu,29 Jan,2009
 *		Sun,08 Feb,2009
 *		Tue,24 Feb,2009
 *		Fri,01 May,2009
 *		Thu,27 Aug,2009
 *		Sun,15 Nov,2009
 *		Tue,17 Nov,2009
 */
int calc_cmpnode_priority( const struct priority_of_metaword_t* const pri,
						  struct meta_word* const mw,
						  int cmp_priority,
						  int cmp_minlen,
						  int* const have_sub_priority,
						  int* const ret_minlen )
{
	const enum splitkind_of_metaword kind = get_metaword_splitkind( mw );
	*ret_minlen = 0;
	switch (kind) {
	case SPLITKIND_OCHAIRE_MIDDLE:
	case SPLITKIND_OCHAIREwoD_MIDDLE:
	case SPLITKIND_OCHAIREwoI_MIDDLE:
	case SPLITKIND_OCHAIREwoIwoD_MIDDLE:
	case SPLITKIND_OCHAIRE_HEAD:
	case SPLITKIND_OCHAIREwoD_HEAD:
		if (anthy_settings.anthy_mode.lattice.with_ochaire_strong) {
			*ret_minlen = mw->ochaire_total_len;
			*have_sub_priority = pri->splitkind_pri[kind].sub;
			return pri->splitkind_pri[kind].pri;
		} else {
			*ret_minlen = 0;
			*have_sub_priority = pri->OCHAIRE_WEAKMODE.sub;
			return pri->OCHAIRE_WEAKMODE.pri;
		}
		break;
	case SPLITKIND_OCHAIRE_TAIL:
	case SPLITKIND_OCHAIRE_SINGLE:
	case SPLITKIND_OCHAIREwoI_TAIL:
	case SPLITKIND_OCHAIREwoD_TAIL:
	case SPLITKIND_OCHAIREwoIwoD_TAIL:
	case SPLITKIND_OCHAIREwoD_SINGLE:
		if (anthy_settings.anthy_mode.lattice.with_ochaire_strong) {
			*ret_minlen = mw->len;
			*have_sub_priority = pri->splitkind_pri[kind].sub;
			return pri->splitkind_pri[kind].pri;
		} else {
			*ret_minlen = 0;
			*have_sub_priority = pri->OCHAIRE_WEAKMODE.sub;
			return pri->OCHAIRE_WEAKMODE.pri;
		}
		break;
	case SPLITKIND_OCHAIREwoI_HEAD:
	case SPLITKIND_OCHAIREwoIwoD_HEAD:
		*ret_minlen = mw->ochaire_total_len;
		*have_sub_priority = pri->splitkind_pri[kind].sub;
		return pri->splitkind_pri[kind].pri;
		break;
	case SPLITKIND_OCHAIREwoI_SINGLE:
	case SPLITKIND_OCHAIREwoIwoD_SINGLE:
		*ret_minlen = mw->len;
		*have_sub_priority = pri->splitkind_pri[kind].sub;
		return pri->splitkind_pri[kind].pri;
		break;
	case SPLITKIND_CANDHISTORY:
		{
			if ((pri->CAND_COUNTER_MINLIMIT.pri <= cmp_priority) && (cmp_minlen <= mw->len)) {
				if (pri->CAND_COUNTER_MAXLIMIT.pri < cmp_priority) {
					cmp_priority = pri->CAND_COUNTER_MAXLIMIT_VALUE.pri;
				}
				if (mw->ochaire_opposite < cmp_priority) {
					mw->ochaire_opposite = cmp_priority;
				}
			}
			if (0 < mw->ochaire_opposite) {
				*ret_minlen = mw->len;
				*have_sub_priority = pri->CAND_COUNTER_MINLIMIT.sub;
				return mw->ochaire_opposite;
			}
			if (anthy_settings.anthy_mode.lattice.with_candhistory_strong) {
				*ret_minlen = mw->len;
				*have_sub_priority = pri->CAND_STRONGMODE.sub;
				return pri->CAND_STRONGMODE.pri;
			}
			*have_sub_priority = pri->CAND_WEAKMODE.sub;
			return pri->CAND_WEAKMODE.pri;
		}
		break;
	case SPLITKIND_DEFAULT:
	case SPLITKIND_COMPOUNDHEAD:
	case SPLITKIND_COMPOUNDPART:
	case SPLITKIND_COMPOUND:
	case SPLITKIND_COMPOUNDLEAF:
		*have_sub_priority = pri->splitkind_pri[kind].sub;
		return pri->splitkind_pri[kind].pri;
		break;
	case SPLITKIND_DELETEDHISTORY:
		*ret_minlen = 0;
		*have_sub_priority = pri->splitkind_pri[kind].sub;
		return pri->splitkind_pri[kind].pri;
		break;
	default:
		assert(0);
		*ret_minlen = 0;
		*have_sub_priority = pri->splitkind_pri[SPLITKIND_DELETEDHISTORY].sub;
		return pri->splitkind_pri[SPLITKIND_DELETEDHISTORY].pri;
		break;
	}
	assert(0);
	*have_sub_priority = pri->splitkind_pri[SPLITKIND_DELETEDHISTORY].sub;
	return pri->splitkind_pri[SPLITKIND_DELETEDHISTORY].pri;
}

/* [ End of File ] */
/* vim:ts=4 sw=4 nomodified:
 */
