#include <assert.h>#include <math.h>#include "const.h"#include "permute.h"#include "callcpp.h"#include "context.h"#include "conversion.h"#include "freelist.h"#include "globals.h"#include "ndminx.h"#include "permdawg.h"#include "permngram.h"#include "ratngs.h"#include "stopper.h"#include "tordvars.h"#include "tprintf.h"#include "trie.h"#include "varable.h"#include "unicharset.h"#include "dict.h"#include "image.h"#include "ccutil.h"| #define GARBAGE_STRING 1.5 |
| #define MAX_DOC_EDGES 250000 |
| #define MAX_NUM_EDGES 2000000 |
| #define MAX_PERM_LENGTH 128 |
| #define MAX_USER_EDGES 50000 |
| #define NON_WERD 1.25 |
| #define SIM_CERTAINTY_OFFSET -10.0 |
| #define SIM_CERTAINTY_SCALE -10.0 |
| #define SIMILARITY_FLOOR 100.0 |
| WERD_CHOICE* get_best_delete_other | ( | WERD_CHOICE * | choice1, | |
| WERD_CHOICE * | choice2 | |||
| ) |
| int good_choice | ( | const WERD_CHOICE & | choice | ) |
| double bestrate_pruning_factor = 2.0 |
"Multiplying factor of current best rate to prune other hypotheses"
| bool doc_dict_enable = 1 |
"Enable Document Dictionary "
| int fragments_debug = 0 |
"Debug character fragments"
| char* global_user_words_suffix = "" |
"A list of user-provided words."
| bool ngram_permuter_activated = FALSE |
"Activate character-level n-gram-based permuter"
| bool permute_debug = 0 |
"Debug char permutation process"
| int permute_only_top = 0 |
"Turn on word script consistency permuter"
| bool save_doc_words = 0 |
"Save Document Words"
| bool segment_debug = 0 |
"Debug the whole segmentation process"
| double segment_penalty_dict_nonword = 1.25 |
"Score multiplier for glyph fragment segmentations which do not " "match a dictionary word (lower is better)."
| double segment_penalty_garbage = 1.5 |
"Score multiplier for poorly cased strings that are not in the " "dictionary and generally look like garbage (lower is better)."
| double segment_reward_script = 0.95 |
"Score multipler for script consistency within a word. " "Being a 'reward' factor, it should be <= 1. " "Smaller value implies bigger reward."
"incorporate segmentation cost in word rating?"
1.6.3