#include "mfcpch.h"#include <ctype.h>#include <string.h>#include "tessbox.h"#include "ratngs.h"#include "notdll.h"#include "varable.h"#include "wordrec.h"#include "ocrclass.h"#include "ocrblock.h"#include "statistc.h"#include "ocrshell.h"#include "pageres.h"#include "elst.h"#include "memry.h"#include "control.h"#include "img.h"#include "tordmain.h"#include "mainblk.h"#include "ocrrow.h"#include "werd.h"#include "rect.h"#include "polyblob.h"#include "stepblob.h"#include "tesseractclass.h"#include "imgs.h"#include "charsample.h"#include "charcut.h"#include "stopper.h"#include "hosthplb.h"#include "secname.h"| #define EXTERN |
| #define WINDOWNAMESIZE 13 |
| CHAR_SAMPLE* clip_sample | ( | PIXROW * | pixrow, | |
| IMAGELINE * | imlines, | |||
| TBOX | pix_box, | |||
| BOOL8 | white_on_black, | |||
| char | c | |||
| ) |
| void display_cluster_prototypes | ( | CHAR_SAMPLES_LIST * | char_clusters | ) |
| void print_em_stats | ( | CHAR_SAMPLES_LIST * | char_clusters, | |
| CHAR_SAMPLE_LIST * | chars_waiting | |||
| ) |
| void reject_all_ems | ( | WERD_RES * | word | ) |
| void reject_all_fullstops | ( | WERD_RES * | word | ) |
| void reject_suspect_fullstops | ( | WERD_RES * | word | ) |
| EXTERN bool tessedit_adapt_to_char_fragments = TRUE |
"Adapt to words that contain " " a character composed form fragments"
| EXTERN double tessedit_cluster_accept_fraction = 0.80 |
"Largest fraction of characters in cluster for it to be used for adaption"
| EXTERN bool tessedit_cluster_debug = FALSE |
"Generate and print debug information for adaption by clustering"
| EXTERN int tessedit_cluster_min_size = 3 |
"Smallest number of samples in a cluster for it to be used for adaption"
| EXTERN double tessedit_cluster_t1 = 0.20 |
"t1 threshold for clustering samples"
| EXTERN double tessedit_cluster_t2 = 0.40 |
"t2 threshold for clustering samples"
| EXTERN double tessedit_cluster_t3 = 0.12 |
"Extra threshold for clustering samples, only keep a new sample if best score greater than this value"
| EXTERN bool tessedit_demo_adaption = FALSE |
"Display cut images and matrix match for demo purposes"
| EXTERN char* tessedit_demo_file = "academe" |
"Name of document containing demo words"
| EXTERN int tessedit_demo_word1 = 62 |
"Word number of first word to display"
| EXTERN int tessedit_demo_word2 = 64 |
"Word number of second word to display"
| EXTERN bool tessedit_matrix_match = TRUE |
"Use matrix matcher"
| EXTERN bool tessedit_mm_adapt_using_prototypes = TRUE |
"Use prototypes when adapting"
| EXTERN bool tessedit_mm_all_rejects = FALSE |
"Adapt to all characters using, matrix matcher"
| EXTERN bool tessedit_mm_only_match_same_char = FALSE |
"Only match samples against clusters for the same character"
| EXTERN bool tessedit_mm_use_non_adaption_set = FALSE |
"Don't try to adapt to characters on this list"
| EXTERN bool tessedit_mm_use_prototypes = TRUE |
"Use prototypes as clusters are built"
| EXTERN bool tessedit_mm_use_rejmap = FALSE |
"Adapt to characters using reject map"
| EXTERN char* tessedit_non_adaption_set = ",.;:'~@*" |
"Characters to be avoided when adapting"
| EXTERN bool tessedit_process_rns = FALSE |
"Handle m - rn ambigs"
| EXTERN bool tessedit_reject_ems = FALSE |
"Reject all m's"
| EXTERN bool tessedit_reject_suspect_ems = FALSE |
"Reject suspect m's"
| EXTERN bool tessedit_test_cluster_input = FALSE |
"Set reject map to enable cluster input to be measured"
| EXTERN bool tessedit_use_best_sample = FALSE |
"Use best sample from cluster when adapting"
1.6.3