tesseract  3.05.02
tesseract::Classify Class Reference

#include <classify.h>

Inheritance diagram for tesseract::Classify:
tesseract::CCStruct tesseract::CUtil tesseract::CCUtil tesseract::Wordrec tesseract::Tesseract

Public Member Functions

 Classify ()
 
virtual ~Classify ()
 
DictgetDict ()
 
const ShapeTableshape_table () const
 
void SetStaticClassifier (ShapeClassifier *static_classifier)
 
void AddLargeSpeckleTo (int blob_length, BLOB_CHOICE_LIST *choices)
 
bool LargeSpeckle (const TBLOB &blob)
 
ADAPT_TEMPLATES NewAdaptedTemplates (bool InitFromUnicharset)
 
int GetFontinfoId (ADAPT_CLASS Class, uinT8 ConfigId)
 
int PruneClasses (const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
 
void ReadNewCutoffs (FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
 
void PrintAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
void WriteAdaptedTemplates (FILE *File, ADAPT_TEMPLATES Templates)
 
ADAPT_TEMPLATES ReadAdaptedTemplates (FILE *File)
 
FLOAT32 ComputeNormMatch (CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
 
void FreeNormProtos ()
 
NORM_PROTOSReadNormProtos (FILE *File, inT64 end_offset)
 
void ConvertProto (PROTO Proto, int ProtoId, INT_CLASS Class)
 
INT_TEMPLATES CreateIntTemplates (CLASSES FloatProtos, const UNICHARSET &target_unicharset)
 
void LearnWord (const char *fontname, WERD_RES *word)
 
void LearnPieces (const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
 
void InitAdaptiveClassifier (bool load_pre_trained_templates)
 
void InitAdaptedClass (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
 
void AmbigClassifier (const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
 
void MasterMatcher (INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
 
void ExpandShapesAndApplyCorrections (ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
 
double ComputeCorrectedRating (bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors)
 
void ConvertMatchesToChoices (const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
 
void AddNewResult (const UnicharRating &new_result, ADAPT_RESULTS *results)
 
int GetAdaptiveFeatures (TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
 
void DebugAdaptiveClassifier (TBLOB *Blob, ADAPT_RESULTS *Results)
 
PROTO_ID MakeNewTempProtos (FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
 
int MakeNewTemporaryConfig (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
 
void MakePermanent (ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
 
void PrintAdaptiveMatchResults (const ADAPT_RESULTS &results)
 
void RemoveExtraPuncs (ADAPT_RESULTS *Results)
 
void RemoveBadMatches (ADAPT_RESULTS *Results)
 
void SetAdaptiveThreshold (FLOAT32 Threshold)
 
void ShowBestMatchFor (int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
 
STRING ClassIDToDebugStr (const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const
 
int ClassAndConfigIDToFontOrShapeID (int class_id, int int_result_config) const
 
int ShapeIDToClassID (int shape_id) const
 
UNICHAR_IDBaselineClassifier (TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
 
int CharNormClassifier (TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
 
int CharNormTrainingSample (bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
 
UNICHAR_IDGetAmbiguities (TBLOB *Blob, CLASS_ID CorrectClass)
 
void DoAdaptiveMatch (TBLOB *Blob, ADAPT_RESULTS *Results)
 
void AdaptToChar (TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
 
void DisplayAdaptedChar (TBLOB *blob, INT_CLASS_STRUCT *int_class)
 
bool AdaptableWord (WERD_RES *word)
 
void EndAdaptiveClassifier ()
 
void SettupPass1 ()
 
void SettupPass2 ()
 
void AdaptiveClassifier (TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
 
void ClassifyAsNoise (ADAPT_RESULTS *Results)
 
void ResetAdaptiveClassifierInternal ()
 
void SwitchAdaptiveClassifier ()
 
void StartBackupAdaptiveClassifier ()
 
int GetCharNormFeature (const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array)
 
void ComputeCharNormArrays (FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
 
bool TempConfigReliable (CLASS_ID class_id, const TEMP_CONFIG &config)
 
void UpdateAmbigsGroup (CLASS_ID class_id, TBLOB *Blob)
 
bool AdaptiveClassifierIsFull () const
 
bool AdaptiveClassifierIsEmpty () const
 
bool LooksLikeGarbage (TBLOB *blob)
 
void RefreshDebugWindow (ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
 
void ClearCharNormArray (uinT8 *char_norm_array)
 
void ComputeIntCharNormArray (const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
 
void ComputeIntFeatures (FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
 
INT_TEMPLATES ReadIntTemplates (FILE *File)
 
void WriteIntTemplates (FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
 
CLASS_ID GetClassToDebug (const char *Prompt, bool *adaptive_on, bool *pretrained_on, int *shape_id)
 
void ShowMatchDisplay ()
 
UnicityTable< FontInfo > & get_fontinfo_table ()
 
const UnicityTable< FontInfo > & get_fontinfo_table () const
 
UnicityTable< FontSet > & get_fontset_table ()
 
void NormalizeOutlines (LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
 
FEATURE_SET ExtractOutlineFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractPicoFeatures (TBLOB *Blob)
 
FEATURE_SET ExtractIntCNFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
FEATURE_SET ExtractIntGeoFeatures (const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
 
void LearnBlob (const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
 
bool WriteTRFile (const STRING &filename)
 
- Public Member Functions inherited from tesseract::CCStruct
 CCStruct ()
 
 ~CCStruct ()
 
- Public Member Functions inherited from tesseract::CUtil
 CUtil ()
 
 ~CUtil ()
 
void read_variables (const char *filename, bool global_only)
 
- Public Member Functions inherited from tesseract::CCUtil
 CCUtil ()
 
virtual ~CCUtil ()
 
void main_setup (const char *argv0, const char *basename)
 CCUtil::main_setup - set location of tessdata and name of image. More...
 
ParamsVectorsparams ()
 

Static Public Member Functions

static void SetupBLCNDenorms (const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
 
static void ExtractFeatures (const TBLOB &blob, bool nonlinear_norm, GenericVector< INT_FEATURE_STRUCT > *bl_features, GenericVector< INT_FEATURE_STRUCT > *cn_features, INT_FX_RESULT_STRUCT *results, GenericVector< int > *outline_cn_counts)
 

Public Attributes

bool allow_blob_division = true
 
bool prioritize_division = FALSE
 
int tessedit_single_match = FALSE
 
bool classify_enable_learning = true
 
int classify_debug_level = 0
 
int classify_norm_method = character
 
double classify_char_norm_range = 0.2
 
double classify_min_norm_scale_x = 0.0
 
double classify_max_norm_scale_x = 0.325
 
double classify_min_norm_scale_y = 0.0
 
double classify_max_norm_scale_y = 0.325
 
double classify_max_rating_ratio = 1.5
 
double classify_max_certainty_margin = 5.5
 
bool tess_cn_matching = 0
 
bool tess_bn_matching = 0
 
bool classify_enable_adaptive_matcher = 1
 
bool classify_use_pre_adapted_templates = 0
 
bool classify_save_adapted_templates = 0
 
bool classify_enable_adaptive_debugger = 0
 
bool classify_nonlinear_norm = 0
 
int matcher_debug_level = 0
 
int matcher_debug_flags = 0
 
int classify_learning_debug_level = 0
 
double matcher_good_threshold = 0.125
 
double matcher_reliable_adaptive_result = 0.0
 
double matcher_perfect_threshold = 0.02
 
double matcher_bad_match_pad = 0.15
 
double matcher_rating_margin = 0.1
 
double matcher_avg_noise_size = 12.0
 
int matcher_permanent_classes_min = 1
 
int matcher_min_examples_for_prototyping = 3
 
int matcher_sufficient_examples_for_prototyping = 5
 
double matcher_clustering_max_angle_delta = 0.015
 
double classify_misfit_junk_penalty = 0.0
 
double rating_scale = 1.5
 
double certainty_scale = 20.0
 
double tessedit_class_miss_scale = 0.00390625
 
double classify_adapted_pruning_factor = 2.5
 
double classify_adapted_pruning_threshold = -1.0
 
int classify_adapt_proto_threshold = 230
 
int classify_adapt_feature_threshold = 230
 
bool disable_character_fragments = TRUE
 
double classify_character_fragments_garbage_certainty_threshold = -3.0
 
bool classify_debug_character_fragments = FALSE
 
bool matcher_debug_separate_windows = FALSE
 
char * classify_learn_debug_str = ""
 
int classify_class_pruner_threshold = 229
 
int classify_class_pruner_multiplier = 15
 
int classify_cp_cutoff_strength = 7
 
int classify_integer_matcher_multiplier = 10
 
INT_TEMPLATES PreTrainedTemplates
 
ADAPT_TEMPLATES AdaptedTemplates
 
ADAPT_TEMPLATES BackupAdaptedTemplates
 
BIT_VECTOR AllProtosOn
 
BIT_VECTOR AllConfigsOn
 
BIT_VECTOR AllConfigsOff
 
BIT_VECTOR TempProtoMask
 
bool EnableLearning
 
NORM_PROTOSNormProtos
 
UnicityTable< FontInfofontinfo_table_
 
UnicityTable< FontSetfontset_table_
 
int il1_adaption_test = 0
 
bool classify_bln_numeric_mode = 0
 
double speckle_large_max_size = 0.30
 
double speckle_rating_penalty = 10.0
 
- Public Attributes inherited from tesseract::CCUtil
STRING datadir
 
STRING imagebasename
 
STRING lang
 
STRING language_data_path_prefix
 
TessdataManager tessdata_manager
 
UNICHARSET unicharset
 
UnicharAmbigs unichar_ambigs
 
STRING imagefile
 
STRING directory
 
char * m_data_sub_dir = "tessdata/"
 
int ambigs_debug_level = 0
 
bool use_definite_ambigs_for_classifier = 0
 
bool use_ambigs_for_adaption = 0
 

Protected Attributes

IntegerMatcher im_
 
FEATURE_DEFS_STRUCT feature_defs_
 
ShapeTableshape_table_
 

Additional Inherited Members

- Static Public Attributes inherited from tesseract::CCStruct
static const double kDescenderFraction = 0.25
 
static const double kXHeightFraction = 0.5
 
static const double kAscenderFraction = 0.25
 
static const double kXHeightCapRatio
 

Detailed Description

Definition at line 61 of file classify.h.

Constructor & Destructor Documentation

◆ Classify()

tesseract::Classify::Classify ( )

Definition at line 35 of file classify.cpp.

36  : BOOL_MEMBER(allow_blob_division, true, "Use divisible blobs chopping",
37  this->params()),
39  "Prioritize blob division over chopping", this->params()),
40  INT_MEMBER(tessedit_single_match, FALSE, "Top choice only from CP",
41  this->params()),
42  BOOL_MEMBER(classify_enable_learning, true, "Enable adaptive classifier",
43  this->params()),
44  INT_MEMBER(classify_debug_level, 0, "Classify debug level",
45  this->params()),
46  INT_MEMBER(classify_norm_method, character, "Normalization Method ...",
47  this->params()),
49  "Character Normalization Range ...", this->params()),
50  double_MEMBER(classify_min_norm_scale_x, 0.0, "Min char x-norm scale ...",
51  this->params()), /* PREV DEFAULT 0.1 */
53  "Max char x-norm scale ...",
54  this->params()), /* PREV DEFAULT 0.3 */
55  double_MEMBER(classify_min_norm_scale_y, 0.0, "Min char y-norm scale ...",
56  this->params()), /* PREV DEFAULT 0.1 */
58  "Max char y-norm scale ...",
59  this->params()), /* PREV DEFAULT 0.3 */
61  "Veto ratio between classifier ratings", this->params()),
63  "Veto difference between classifier certainties",
64  this->params()),
65  BOOL_MEMBER(tess_cn_matching, 0, "Character Normalized Matching",
66  this->params()),
67  BOOL_MEMBER(tess_bn_matching, 0, "Baseline Normalized Matching",
68  this->params()),
70  "Enable adaptive classifier", this->params()),
72  "Use pre-adapted classifier templates", this->params()),
74  "Save adapted templates to a file", this->params()),
75  BOOL_MEMBER(classify_enable_adaptive_debugger, 0, "Enable match debugger",
76  this->params()),
78  "Non-linear stroke-density normalization", this->params()),
79  INT_MEMBER(matcher_debug_level, 0, "Matcher Debug Level", this->params()),
80  INT_MEMBER(matcher_debug_flags, 0, "Matcher Debug Flags", this->params()),
81  INT_MEMBER(classify_learning_debug_level, 0, "Learning Debug Level: ",
82  this->params()),
83  double_MEMBER(matcher_good_threshold, 0.125, "Good Match (0-1)",
84  this->params()),
85  double_MEMBER(matcher_reliable_adaptive_result, 0.0, "Great Match (0-1)",
86  this->params()),
87  double_MEMBER(matcher_perfect_threshold, 0.02, "Perfect Match (0-1)",
88  this->params()),
89  double_MEMBER(matcher_bad_match_pad, 0.15, "Bad Match Pad (0-1)",
90  this->params()),
91  double_MEMBER(matcher_rating_margin, 0.1, "New template margin (0-1)",
92  this->params()),
93  double_MEMBER(matcher_avg_noise_size, 12.0, "Avg. noise blob length",
94  this->params()),
95  INT_MEMBER(matcher_permanent_classes_min, 1, "Min # of permanent classes",
96  this->params()),
98  "Reliable Config Threshold", this->params()),
100  "Enable adaption even if the ambiguities have not been seen",
101  this->params()),
103  "Maximum angle delta for prototype clustering",
104  this->params()),
106  "Penalty to apply when a non-alnum is vertically out of "
107  "its expected textline position",
108  this->params()),
109  double_MEMBER(rating_scale, 1.5, "Rating scaling factor", this->params()),
110  double_MEMBER(certainty_scale, 20.0, "Certainty scaling factor",
111  this->params()),
113  "Scale factor for features not used", this->params()),
116  "Prune poor adapted results this much worse than best result",
117  this->params()),
119  "Threshold at which classify_adapted_pruning_factor starts",
120  this->params()),
122  "Threshold for good protos during adaptive 0-255",
123  this->params()),
125  "Threshold for good features during adaptive 0-255",
126  this->params()),
128  "Do not include character fragments in the"
129  " results of the classifier",
130  this->params()),
132  -3.0,
133  "Exclude fragments that do not look like whole"
134  " characters from training and adaption",
135  this->params()),
137  "Bring up graphical debugging windows for fragments training",
138  this->params()),
140  "Use two different windows for debugging the matching: "
141  "One for the protos and one for the features.",
142  this->params()),
143  STRING_MEMBER(classify_learn_debug_str, "", "Class str to debug learning",
144  this->params()),
146  "Class Pruner Threshold 0-255", this->params()),
148  "Class Pruner Multiplier 0-255: ", this->params()),
150  "Class Pruner CutoffStrength: ", this->params()),
152  "Integer Matcher Multiplier 0-255: ", this->params()),
153  EnableLearning(true),
155  "Don't adapt to i/I at beginning of word", this->params()),
157  "Assume the input is numbers [0-9].", this->params()),
158  double_MEMBER(speckle_large_max_size, 0.30, "Max large speckle size",
159  this->params()),
161  "Penalty to add to worst rating for noise", this->params()),
162  shape_table_(NULL),
163  dict_(this),
164  static_classifier_(NULL) {
165  fontinfo_table_.set_compare_callback(
167  fontinfo_table_.set_clear_callback(
169  fontset_table_.set_compare_callback(
171  fontset_table_.set_clear_callback(
173  AdaptedTemplates = NULL;
174  BackupAdaptedTemplates = NULL;
175  PreTrainedTemplates = NULL;
176  AllProtosOn = NULL;
177  AllConfigsOn = NULL;
178  AllConfigsOff = NULL;
179  TempProtoMask = NULL;
180  NormProtos = NULL;
181 
182  NumAdaptationsFailed = 0;
183 
184  learn_debug_win_ = NULL;
185  learn_fragmented_word_debug_win_ = NULL;
186  learn_fragments_debug_win_ = NULL;
187 
188  CharNormCutoffs = new uinT16[MAX_NUM_CLASSES];
189  BaselineCutoffs = new uinT16[MAX_NUM_CLASSES];
190 }
double matcher_rating_margin
Definition: classify.h:424
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
bool classify_enable_adaptive_debugger
Definition: classify.h:414
double matcher_good_threshold
Definition: classify.h:420
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:453
double matcher_clustering_max_angle_delta
Definition: classify.h:432
#define TRUE
Definition: capi.h:45
double classify_min_norm_scale_x
Definition: classify.h:397
bool classify_enable_learning
Definition: classify.h:389
bool classify_enable_adaptive_matcher
Definition: classify.h:409
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
void FontInfoDeleteCallback(FontInfo f)
Definition: fontinfo.cpp:139
void FontSetDeleteCallback(FontSet fs)
Definition: fontinfo.cpp:146
double matcher_bad_match_pad
Definition: classify.h:423
#define STRING_MEMBER(name, val, comment, vec)
Definition: params.h:307
BIT_VECTOR TempProtoMask
Definition: classify.h:483
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
char * classify_learn_debug_str
Definition: classify.h:459
double tessedit_class_miss_scale
Definition: classify.h:439
bool prioritize_division
Definition: classify.h:387
int classify_adapt_feature_threshold
Definition: classify.h:447
int matcher_permanent_classes_min
Definition: classify.h:426
bool classify_use_pre_adapted_templates
Definition: classify.h:411
double matcher_perfect_threshold
Definition: classify.h:422
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
double classify_min_norm_scale_y
Definition: classify.h:399
bool classify_debug_character_fragments
Definition: classify.h:455
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:430
bool CompareFontSet(const FontSet &fs1, const FontSet &fs2)
Definition: fontinfo.cpp:128
unsigned short uinT16
Definition: host.h:34
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:304
#define FALSE
Definition: capi.h:46
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:301
bool matcher_debug_separate_windows
Definition: classify.h:458
double classify_adapted_pruning_factor
Definition: classify.h:441
BIT_VECTOR AllProtosOn
Definition: classify.h:480
double classify_misfit_junk_penalty
Definition: classify.h:435
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:310
bool classify_nonlinear_norm
Definition: classify.h:416
double classify_max_norm_scale_y
Definition: classify.h:400
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
int classify_adapt_proto_threshold
Definition: classify.h:445
int matcher_min_examples_for_prototyping
Definition: classify.h:428
ParamsVectors * params()
Definition: ccutil.h:63
int classify_learning_debug_level
Definition: classify.h:419
ShapeTable * shape_table_
Definition: classify.h:512
double classify_max_certainty_margin
Definition: classify.h:404
bool disable_character_fragments
Definition: classify.h:450
double classify_adapted_pruning_threshold
Definition: classify.h:443
int classify_integer_matcher_multiplier
Definition: classify.h:469
bool allow_blob_division
Definition: classify.h:382
int classify_class_pruner_multiplier
Definition: classify.h:465
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
bool CompareFontInfo(const FontInfo &fi1, const FontInfo &fi2)
Definition: fontinfo.cpp:120
double speckle_large_max_size
Definition: classify.h:501
double classify_max_norm_scale_x
Definition: classify.h:398
int classify_class_pruner_threshold
Definition: classify.h:463
bool classify_bln_numeric_mode
Definition: classify.h:500
double classify_max_rating_ratio
Definition: classify.h:402
double matcher_reliable_adaptive_result
Definition: classify.h:421
bool classify_save_adapted_templates
Definition: classify.h:413
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
int classify_cp_cutoff_strength
Definition: classify.h:467
double certainty_scale
Definition: classify.h:437
NORM_PROTOS * NormProtos
Definition: classify.h:486
double classify_char_norm_range
Definition: classify.h:396
double speckle_rating_penalty
Definition: classify.h:503
double matcher_avg_noise_size
Definition: classify.h:425

◆ ~Classify()

tesseract::Classify::~Classify ( )
virtual

Definition at line 192 of file classify.cpp.

192  {
194  delete learn_debug_win_;
195  delete learn_fragmented_word_debug_win_;
196  delete learn_fragments_debug_win_;
197  delete[] CharNormCutoffs;
198  delete[] BaselineCutoffs;
199 }
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:456

Member Function Documentation

◆ AdaptableWord()

bool tesseract::Classify::AdaptableWord ( WERD_RES word)

Return TRUE if the specified word is acceptable for adaptation.

Globals: none

Parameters
wordcurrent word
Returns
TRUE or FALSE
Note
Exceptions: none
History: Thu May 30 14:25:06 1991, DSJ, Created.

Definition at line 850 of file adaptmatch.cpp.

850  {
851  if (word->best_choice == NULL) return false;
852  int BestChoiceLength = word->best_choice->length();
853  float adaptable_score =
855  return // rules that apply in general - simplest to compute first
856  BestChoiceLength > 0 &&
857  BestChoiceLength == word->rebuild_word->NumBlobs() &&
858  BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
859  // This basically ensures that the word is at least a dictionary match
860  // (freq word, user word, system dawg word, etc).
861  // Since all the other adjustments will make adjust factor higher
862  // than higher than adaptable_score=1.1+0.05=1.15
863  // Since these are other flags that ensure that the word is dict word,
864  // this check could be at times redundant.
865  word->best_choice->adjust_factor() <= adaptable_score &&
866  // Make sure that alternative choices are not dictionary words.
867  word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score);
868 }
TWERD * rebuild_word
Definition: pageres.h:244
float adjust_factor() const
Definition: ratngs.h:304
#define MAX_ADAPTABLE_WERD_SIZE
Definition: adaptmatch.cpp:71
WERD_CHOICE * best_choice
Definition: pageres.h:219
Dict & getDict()
Definition: classify.h:65
int length() const
Definition: ratngs.h:301
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
Definition: pageres.cpp:430
int NumBlobs() const
Definition: blobs.h:425
double segment_penalty_dict_case_ok
Definition: dict.h:582
#define ADAPTABLE_WERD_ADJUSTMENT
Definition: adaptmatch.cpp:73

◆ AdaptiveClassifier()

void tesseract::Classify::AdaptiveClassifier ( TBLOB Blob,
BLOB_CHOICE_LIST *  Choices 
)

This routine calls the adaptive matcher which returns (in an array) the class id of each class matched.

It also returns the number of classes matched. For each class matched it places the best rating found for that class into the Ratings array.

Bad matches are then removed so that they don't need to be sorted. The remaining good matches are then sorted and converted to choices.

This routine also performs some simple speckle filtering.

Note
Exceptions: none
History: Mon Mar 11 10:00:58 1991, DSJ, Created.
Parameters
Blobblob to be classified
[out]ChoicesList of choices found by adaptive matcher. filled on return with the choices found by the class pruner and the ratings therefrom. Also contains the detailed results of the integer matcher.

Definition at line 185 of file adaptmatch.cpp.

185  {
186  assert(Choices != NULL);
187  ADAPT_RESULTS *Results = new ADAPT_RESULTS;
188  Results->Initialize();
189 
190  ASSERT_HOST(AdaptedTemplates != NULL);
191 
192  DoAdaptiveMatch(Blob, Results);
193 
194  RemoveBadMatches(Results);
196  RemoveExtraPuncs(Results);
197  Results->ComputeBest();
198  ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
199  Choices);
200 
201  // TODO(rays) Move to before ConvertMatchesToChoices!
202  if (LargeSpeckle(*Blob) || Choices->length() == 0)
203  AddLargeSpeckleTo(Results->BlobLength, Choices);
204 
205  if (matcher_debug_level >= 1) {
206  tprintf("AD Matches = ");
207  PrintAdaptiveMatchResults(*Results);
208  }
209 
210 #ifndef GRAPHICS_DISABLED
212  DebugAdaptiveClassifier(Blob, Results);
213 #endif
214 
215  delete Results;
216 } /* AdaptiveClassifier */
void ComputeBest()
Definition: adaptmatch.cpp:99
bool classify_enable_adaptive_debugger
Definition: classify.h:414
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:235
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
TBOX bounding_box() const
Definition: blobs.cpp:482
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
void RemoveBadMatches(ADAPT_RESULTS *Results)
const DENORM & denorm() const
Definition: blobs.h:340
void Initialize()
Definition: adaptmatch.cpp:93
#define tprintf(...)
Definition: tprintf.h:31
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:212
inT32 BlobLength
Definition: adaptmatch.cpp:83
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ AdaptiveClassifierIsEmpty()

bool tesseract::Classify::AdaptiveClassifierIsEmpty ( ) const
inline

Definition at line 285 of file classify.h.

285  {
286  return AdaptedTemplates->NumPermClasses == 0;
287  }
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473

◆ AdaptiveClassifierIsFull()

bool tesseract::Classify::AdaptiveClassifierIsFull ( ) const
inline

Definition at line 284 of file classify.h.

284 { return NumAdaptationsFailed > 0; }

◆ AdaptToChar()

void tesseract::Classify::AdaptToChar ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
FLOAT32  Threshold,
ADAPT_TEMPLATES  adaptive_templates 
)
Parameters
Blobblob to add to templates for ClassId
ClassIdclass to add blob to
FontinfoIdfont information from pre-trained templates
Thresholdminimum match rating to existing template
adaptive_templatescurrent set of adapted templates

Globals:

  • AllProtosOn dummy mask to match against all protos
  • AllConfigsOn dummy mask to match against all configs
Returns
none
Note
Exceptions: none
History: Thu Mar 14 09:36:03 1991, DSJ, Created.

Definition at line 886 of file adaptmatch.cpp.

888  {
889  int NumFeatures;
890  INT_FEATURE_ARRAY IntFeatures;
891  UnicharRating int_result;
892  INT_CLASS IClass;
893  ADAPT_CLASS Class;
894  TEMP_CONFIG TempConfig;
895  FEATURE_SET FloatFeatures;
896  int NewTempConfigId;
897 
898  if (!LegalClassId (ClassId))
899  return;
900 
901  int_result.unichar_id = ClassId;
902  Class = adaptive_templates->Class[ClassId];
903  assert(Class != NULL);
904  if (IsEmptyAdaptedClass(Class)) {
905  InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates);
906  } else {
907  IClass = ClassForClassId(adaptive_templates->Templates, ClassId);
908 
909  NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
910  if (NumFeatures <= 0) {
911  return; // Features already freed by GetAdaptiveFeatures.
912  }
913 
914  // Only match configs with the matching font.
915  BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
916  for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
917  if (GetFontinfoId(Class, cfg) == FontinfoId) {
918  SET_BIT(MatchingFontConfigs, cfg);
919  } else {
920  reset_bit(MatchingFontConfigs, cfg);
921  }
922  }
923  im_.Match(IClass, AllProtosOn, MatchingFontConfigs,
924  NumFeatures, IntFeatures,
927  FreeBitVector(MatchingFontConfigs);
928 
929  SetAdaptiveThreshold(Threshold);
930 
931  if (1.0f - int_result.rating <= Threshold) {
932  if (ConfigIsPermanent(Class, int_result.config)) {
934  tprintf("Found good match to perm config %d = %4.1f%%.\n",
935  int_result.config, int_result.rating * 100.0);
936  FreeFeatureSet(FloatFeatures);
937  return;
938  }
939 
940  TempConfig = TempConfigFor(Class, int_result.config);
941  IncreaseConfidence(TempConfig);
942  if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) {
943  Class->MaxNumTimesSeen = TempConfig->NumTimesSeen;
944  }
946  tprintf("Increasing reliability of temp config %d to %d.\n",
947  int_result.config, TempConfig->NumTimesSeen);
948 
949  if (TempConfigReliable(ClassId, TempConfig)) {
950  MakePermanent(adaptive_templates, ClassId, int_result.config, Blob);
951  UpdateAmbigsGroup(ClassId, Blob);
952  }
953  } else {
955  tprintf("Found poor match to temp config %d = %4.1f%%.\n",
956  int_result.config, int_result.rating * 100.0);
958  DisplayAdaptedChar(Blob, IClass);
959  }
960  NewTempConfigId =
961  MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId,
962  NumFeatures, IntFeatures, FloatFeatures);
963  if (NewTempConfigId >= 0 &&
964  TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
965  MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
966  UpdateAmbigsGroup(ClassId, Blob);
967  }
968 
969 #ifndef GRAPHICS_DISABLED
971  DisplayAdaptedChar(Blob, IClass);
972  }
973 #endif
974  }
975  FreeFeatureSet(FloatFeatures);
976  }
977 } /* AdaptToChar */
#define NO_DEBUG
Definition: adaptmatch.cpp:70
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:190
#define IncreaseConfidence(TempConfig)
Definition: adaptive.h:108
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:155
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:89
int classify_adapt_feature_threshold
Definition: classify.h:447
#define MAX_NUM_PROTOS
Definition: intproto.h:47
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
#define ClassForClassId(T, c)
Definition: intproto.h:181
#define SET_BIT(array, bit)
Definition: bitvec.h:57
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:54
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
uinT8 NumTimesSeen
Definition: adaptive.h:41
bool matcher_debug_separate_windows
Definition: classify.h:458
BIT_VECTOR AllProtosOn
Definition: classify.h:480
INT_TEMPLATES Templates
Definition: adaptive.h:77
uinT8 MaxNumTimesSeen
Definition: adaptive.h:66
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:979
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
Definition: adaptmatch.cpp:812
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
#define LegalClassId(c)
Definition: intproto.h:179
void SetAdaptiveThreshold(FLOAT32 Threshold)
#define reset_bit(array, bit)
Definition: bitvec.h:59
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:77
uinT8 NumConfigs
Definition: intproto.h:110
IntegerMatcher im_
Definition: classify.h:503
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
Definition: adaptmatch.cpp:717
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81

◆ AddLargeSpeckleTo()

void tesseract::Classify::AddLargeSpeckleTo ( int  blob_length,
BLOB_CHOICE_LIST *  choices 
)

Definition at line 212 of file classify.cpp.

212  {
213  BLOB_CHOICE_IT bc_it(choices);
214  // If there is no classifier result, we will use the worst possible certainty
215  // and corresponding rating.
216  float certainty = -getDict().certainty_scale;
217  float rating = rating_scale * blob_length;
218  if (!choices->empty() && blob_length > 0) {
219  bc_it.move_to_last();
220  BLOB_CHOICE* worst_choice = bc_it.data();
221  // Add speckle_rating_penalty to worst rating, matching old value.
222  rating = worst_choice->rating() + speckle_rating_penalty;
223  // Compute the rating to correspond to the certainty. (Used to be kept
224  // the same, but that messes up the language model search.)
225  certainty = -rating * getDict().certainty_scale /
226  (rating_scale * blob_length);
227  }
228  BLOB_CHOICE* blob_choice = new BLOB_CHOICE(UNICHAR_SPACE, rating, certainty,
229  -1, 0.0f, MAX_FLOAT32, 0,
231  bc_it.add_to_end(blob_choice);
232 }
double certainty_scale
Definition: dict.h:609
#define MAX_FLOAT32
Definition: host.h:57
float rating() const
Definition: ratngs.h:79
Dict & getDict()
Definition: classify.h:65
double speckle_rating_penalty
Definition: classify.h:503

◆ AddNewResult()

void tesseract::Classify::AddNewResult ( const UnicharRating new_result,
ADAPT_RESULTS results 
)

This routine adds the result of a classification into Results. If the new rating is much worse than the current best rating, it is not entered into results because it would end up being stripped later anyway. If the new rating is better than the old rating for the class, it replaces the old rating. If this is the first rating for the class, the class is added to the list of matched classes in Results. If the new rating is better than the best so far, it becomes the best so far.

Globals:

Parameters
new_resultnew result to add
[out]resultsresults to add new result to
Note
Exceptions: none
History: Tue Mar 12 18:19:29 1991, DSJ, Created.

Definition at line 1030 of file adaptmatch.cpp.

1031  {
1032  int old_match = FindScoredUnichar(new_result.unichar_id, *results);
1033 
1034  if (new_result.rating + matcher_bad_match_pad < results->best_rating ||
1035  (old_match < results->match.size() &&
1036  new_result.rating <= results->match[old_match].rating))
1037  return; // New one not good enough.
1038 
1039  if (!unicharset.get_fragment(new_result.unichar_id))
1040  results->HasNonfragment = true;
1041 
1042  if (old_match < results->match.size()) {
1043  results->match[old_match].rating = new_result.rating;
1044  } else {
1045  results->match.push_back(new_result);
1046  }
1047 
1048  if (new_result.rating > results->best_rating &&
1049  // Ensure that fragments do not affect best rating, class and config.
1050  // This is needed so that at least one non-fragmented character is
1051  // always present in the results.
1052  // TODO(daria): verify that this helps accuracy and does not
1053  // hurt performance.
1054  !unicharset.get_fragment(new_result.unichar_id)) {
1055  results->best_match_index = old_match;
1056  results->best_rating = new_result.rating;
1057  results->best_unichar_id = new_result.unichar_id;
1058  }
1059 } /* AddNewResult */
bool HasNonfragment
Definition: adaptmatch.cpp:84
int push_back(T object)
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
int best_match_index
Definition: adaptmatch.cpp:86
UNICHARSET unicharset
Definition: ccutil.h:70
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
FLOAT32 best_rating
Definition: adaptmatch.cpp:87

◆ AmbigClassifier()

void tesseract::Classify::AmbigClassifier ( const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
const TBLOB blob,
INT_TEMPLATES  templates,
ADAPT_CLASS classes,
UNICHAR_ID ambiguities,
ADAPT_RESULTS results 
)

This routine is identical to CharNormClassifier() except that it does no class pruning. It simply matches the unknown blob against the classes listed in Ambiguities.

Globals:

Parameters
blobblob to be classified
templatesbuilt-in templates to classify against
classesadapted class templates
ambiguitiesarray of unichar id's to match against
[out]resultsplace to put match results
int_features
fx_info
Note
Exceptions: none
History: Tue Mar 12 19:40:36 1991, DSJ, Created.

Definition at line 1084 of file adaptmatch.cpp.

1091  {
1092  if (int_features.empty()) return;
1093  uinT8* CharNormArray = new uinT8[unicharset.size()];
1094  UnicharRating int_result;
1095 
1096  results->BlobLength = GetCharNormFeature(fx_info, templates, NULL,
1097  CharNormArray);
1098  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1099  if (debug)
1100  tprintf("AM Matches = ");
1101 
1102  int top = blob->bounding_box().top();
1103  int bottom = blob->bounding_box().bottom();
1104  while (*ambiguities >= 0) {
1105  CLASS_ID class_id = *ambiguities;
1106 
1107  int_result.unichar_id = class_id;
1108  im_.Match(ClassForClassId(templates, class_id),
1110  int_features.size(), &int_features[0],
1111  &int_result,
1114 
1115  ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0,
1116  results->BlobLength,
1118  CharNormArray, &int_result, results);
1119  ambiguities++;
1120  }
1121  delete [] CharNormArray;
1122 } /* AmbigClassifier */
#define NO_DEBUG
Definition: adaptmatch.cpp:70
int size() const
Definition: unicharset.h:297
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475
unsigned char uinT8
Definition: host.h:32
TBOX bounding_box() const
Definition: blobs.cpp:482
int classify_adapt_feature_threshold
Definition: classify.h:447
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define ClassForClassId(T, c)
Definition: intproto.h:181
inT16 bottom() const
Definition: rect.h:61
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array)
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
bool matcher_debug_separate_windows
Definition: classify.h:458
BIT_VECTOR AllProtosOn
Definition: classify.h:480
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
#define tprintf(...)
Definition: tprintf.h:31
int size() const
Definition: genericvector.h:72
int classify_integer_matcher_multiplier
Definition: classify.h:469
inT16 top() const
Definition: rect.h:54
inT32 BlobLength
Definition: adaptmatch.cpp:83
UNICHARSET unicharset
Definition: ccutil.h:70
bool empty() const
Definition: genericvector.h:84
IntegerMatcher im_
Definition: classify.h:503

◆ BaselineClassifier()

UNICHAR_ID * tesseract::Classify::BaselineClassifier ( TBLOB Blob,
const GenericVector< INT_FEATURE_STRUCT > &  int_features,
const INT_FX_RESULT_STRUCT fx_info,
ADAPT_TEMPLATES  Templates,
ADAPT_RESULTS Results 
)

This routine extracts baseline normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Globals:

  • BaselineCutoffs expected num features for each class
Parameters
Blobblob to be classified
Templatescurrent set of adapted templates
Resultsplace to put match results
int_features
fx_info
Returns
Array of possible ambiguous chars that should be checked.
Note
Exceptions: none
History: Tue Mar 12 19:38:03 1991, DSJ, Created.

Definition at line 1306 of file adaptmatch.cpp.

1309  {
1310  if (int_features.empty()) return NULL;
1311  uinT8* CharNormArray = new uinT8[unicharset.size()];
1312  ClearCharNormArray(CharNormArray);
1313 
1315  PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0],
1316  CharNormArray, BaselineCutoffs, &Results->CPResults);
1317 
1318  if (matcher_debug_level >= 2 || classify_debug_level > 1)
1319  tprintf("BL Matches = ");
1320 
1321  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
1322  CharNormArray,
1323  Templates->Class, matcher_debug_flags, 0,
1324  Blob->bounding_box(), Results->CPResults, Results);
1325 
1326  delete [] CharNormArray;
1327  CLASS_ID ClassId = Results->best_unichar_id;
1328  if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0)
1329  return NULL;
1330 
1331  return Templates->Class[ClassId]->
1332  Config[Results->match[Results->best_match_index].config].Perm->Ambigs;
1333 } /* BaselineClassifier */
int IntCastRounded(double x)
Definition: helpers.h:172
int size() const
Definition: unicharset.h:297
void ClearCharNormArray(uinT8 *char_norm_array)
Definition: float2int.cpp:48
CLUSTERCONFIG Config
unsigned char uinT8
Definition: host.h:32
TBOX bounding_box() const
Definition: blobs.cpp:482
const double kStandardFeatureLength
Definition: intfx.h:46
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
GenericVector< CP_RESULT_STRUCT > CPResults
Definition: adaptmatch.cpp:89
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
INT_TEMPLATES Templates
Definition: adaptive.h:77
int best_match_index
Definition: adaptmatch.cpp:86
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:412
#define tprintf(...)
Definition: tprintf.h:31
int size() const
Definition: genericvector.h:72
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
inT32 BlobLength
Definition: adaptmatch.cpp:83
UNICHARSET unicharset
Definition: ccutil.h:70
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
bool empty() const
Definition: genericvector.h:84
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81

◆ CharNormClassifier()

int tesseract::Classify::CharNormClassifier ( TBLOB blob,
const TrainingSample sample,
ADAPT_RESULTS adapt_results 
)

This routine extracts character normalized features from the unknown character and matches them against the specified set of templates. The classes which match are added to Results.

Parameters
blobblob to be classified
sampletemplates to classify unknown against
adapt_resultsplace to put match results

Globals:

  • CharNormCutoffs expected num features for each class
  • AllProtosOn mask that enables all protos
  • AllConfigsOn mask that enables all configs
Note
Exceptions: none
History: Tue Mar 12 16:02:52 1991, DSJ, Created.

Definition at line 1355 of file adaptmatch.cpp.

1357  {
1358  // This is the length that is used for scaling ratings vs certainty.
1359  adapt_results->BlobLength =
1360  IntCastRounded(sample.outline_length() / kStandardFeatureLength);
1361  GenericVector<UnicharRating> unichar_results;
1362  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
1363  -1, &unichar_results);
1364  // Convert results to the format used internally by AdaptiveClassifier.
1365  for (int r = 0; r < unichar_results.size(); ++r) {
1366  AddNewResult(unichar_results[r], adapt_results);
1367  }
1368  return sample.num_features();
1369 } /* CharNormClassifier */
Pix * pix() const
Definition: normalis.h:248
int IntCastRounded(double x)
Definition: helpers.h:172
const double kStandardFeatureLength
Definition: intfx.h:46
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
const DENORM & denorm() const
Definition: blobs.h:340
inT32 BlobLength
Definition: adaptmatch.cpp:83
Definition: cluster.h:32
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)

◆ CharNormTrainingSample()

int tesseract::Classify::CharNormTrainingSample ( bool  pruner_only,
int  keep_this,
const TrainingSample sample,
GenericVector< UnicharRating > *  results 
)

Definition at line 1373 of file adaptmatch.cpp.

1376  {
1377  results->clear();
1378  ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS();
1379  adapt_results->Initialize();
1380  // Compute the bounding box of the features.
1381  int num_features = sample.num_features();
1382  // Only the top and bottom of the blob_box are used by MasterMatcher, so
1383  // fabricate right and left using top and bottom.
1384  TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),
1385  sample.geo_feature(GeoTop), sample.geo_feature(GeoTop));
1386  // Compute the char_norm_array from the saved cn_feature.
1387  FEATURE norm_feature = sample.GetCNFeature();
1388  uinT8* char_norm_array = new uinT8[unicharset.size()];
1389  int num_pruner_classes = MAX(unicharset.size(),
1391  uinT8* pruner_norm_array = new uinT8[num_pruner_classes];
1392  adapt_results->BlobLength =
1393  static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5);
1394  ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
1395  pruner_norm_array);
1396 
1397  PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(),
1398  pruner_norm_array,
1399  shape_table_ != NULL ? &shapetable_cutoffs_[0] : CharNormCutoffs,
1400  &adapt_results->CPResults);
1401  delete [] pruner_norm_array;
1402  if (keep_this >= 0) {
1403  adapt_results->CPResults[0].Class = keep_this;
1404  adapt_results->CPResults.truncate(1);
1405  }
1406  if (pruner_only) {
1407  // Convert pruner results to output format.
1408  for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
1409  int class_id = adapt_results->CPResults[i].Class;
1410  results->push_back(
1411  UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
1412  }
1413  } else {
1414  MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
1415  char_norm_array,
1416  NULL, matcher_debug_flags,
1418  blob_box, adapt_results->CPResults, adapt_results);
1419  // Convert master matcher results to output format.
1420  for (int i = 0; i < adapt_results->match.size(); i++) {
1421  results->push_back(adapt_results->match[i]);
1422  }
1424  }
1425  delete [] char_norm_array;
1426  delete adapt_results;
1427  return num_features;
1428 } /* CharNormTrainingSample */
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
int size() const
Definition: unicharset.h:297
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
unsigned char uinT8
Definition: host.h:32
int push_back(T object)
GenericVector< CP_RESULT_STRUCT > CPResults
Definition: adaptmatch.cpp:89
FLOAT32 ActualOutlineLength(FEATURE Feature)
Definition: normfeat.cpp:32
void Initialize()
Definition: adaptmatch.cpp:93
void truncate(int size)
#define MAX(x, y)
Definition: ndminx.h:24
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:412
ShapeTable * shape_table_
Definition: classify.h:512
int size() const
Definition: genericvector.h:72
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
int classify_integer_matcher_multiplier
Definition: classify.h:469
inT32 BlobLength
Definition: adaptmatch.cpp:83
Definition: cluster.h:32
Definition: rect.h:30
UNICHARSET unicharset
Definition: ccutil.h:70
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88

◆ ClassAndConfigIDToFontOrShapeID()

int tesseract::Classify::ClassAndConfigIDToFontOrShapeID ( int  class_id,
int  int_result_config 
) const

Definition at line 2284 of file adaptmatch.cpp.

2285  {
2286  int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id;
2287  // Older inttemps have no font_ids.
2288  if (font_set_id < 0)
2289  return kBlankFontinfoId;
2290  const FontSet &fs = fontset_table_.get(font_set_id);
2291  ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size);
2292  return fs.configs[int_result_config];
2293 }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
#define ASSERT_HOST(x)
Definition: errcode.h:84
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124

◆ ClassIDToDebugStr()

STRING tesseract::Classify::ClassIDToDebugStr ( const INT_TEMPLATES_STRUCT templates,
int  class_id,
int  config_id 
) const

Definition at line 2271 of file adaptmatch.cpp.

2272  {
2273  STRING class_string;
2274  if (templates == PreTrainedTemplates && shape_table_ != NULL) {
2275  int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id);
2276  class_string = shape_table_->DebugStr(shape_id);
2277  } else {
2278  class_string = unicharset.debug_str(class_id);
2279  }
2280  return class_string;
2281 }
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:318
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
Definition: strngs.h:44
ShapeTable * shape_table_
Definition: classify.h:512
UNICHARSET unicharset
Definition: ccutil.h:70
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:291

◆ ClassifyAsNoise()

void tesseract::Classify::ClassifyAsNoise ( ADAPT_RESULTS results)

This routine computes a rating which reflects the likelihood that the blob being classified is a noise blob. NOTE: assumes that the blob length has already been computed and placed into Results.

Parameters
resultsresults to add noise classification to

Globals:

  • matcher_avg_noise_size avg. length of a noise blob
Note
Exceptions: none
History: Tue Mar 12 18:36:52 1991, DSJ, Created.

Definition at line 1446 of file adaptmatch.cpp.

1446  {
1447  float rating = results->BlobLength / matcher_avg_noise_size;
1448  rating *= rating;
1449  rating /= 1.0 + rating;
1450 
1451  AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results);
1452 } /* ClassifyAsNoise */
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
inT32 BlobLength
Definition: adaptmatch.cpp:83
double matcher_avg_noise_size
Definition: classify.h:425

◆ ClearCharNormArray()

void tesseract::Classify::ClearCharNormArray ( uinT8 char_norm_array)

For each class in the unicharset, clears the corresponding entry in char_norm_array. char_norm_array is indexed by unichar_id.

Globals:

  • none
Parameters
char_norm_arrayarray to be cleared
Note
Exceptions: none
History: Wed Feb 20 11:20:54 1991, DSJ, Created.

Definition at line 48 of file float2int.cpp.

48  {
49  memset(char_norm_array, 0, sizeof(*char_norm_array) * unicharset.size());
50 } /* ClearCharNormArray */
int size() const
Definition: unicharset.h:297
UNICHARSET unicharset
Definition: ccutil.h:70

◆ ComputeCharNormArrays()

void tesseract::Classify::ComputeCharNormArrays ( FEATURE_STRUCT norm_feature,
INT_TEMPLATES_STRUCT templates,
uinT8 char_norm_array,
uinT8 pruner_array 
)

Definition at line 1748 of file adaptmatch.cpp.

1751  {
1752  ComputeIntCharNormArray(*norm_feature, char_norm_array);
1753  if (pruner_array != NULL) {
1754  if (shape_table_ == NULL) {
1755  ComputeIntCharNormArray(*norm_feature, pruner_array);
1756  } else {
1757  memset(pruner_array, MAX_UINT8,
1758  templates->NumClasses * sizeof(pruner_array[0]));
1759  // Each entry in the pruner norm array is the MIN of all the entries of
1760  // the corresponding unichars in the CharNormArray.
1761  for (int id = 0; id < templates->NumClasses; ++id) {
1762  int font_set_id = templates->Class[id]->font_set_id;
1763  const FontSet &fs = fontset_table_.get(font_set_id);
1764  for (int config = 0; config < fs.size; ++config) {
1765  const Shape& shape = shape_table_->GetShape(fs.configs[config]);
1766  for (int c = 0; c < shape.size(); ++c) {
1767  if (char_norm_array[shape[c].unichar_id] < pruner_array[id])
1768  pruner_array[id] = char_norm_array[shape[c].unichar_id];
1769  }
1770  }
1771  }
1772  }
1773  }
1774  FreeFeature(norm_feature);
1775 }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
void FreeFeature(FEATURE Feature)
Definition: ocrfeatures.cpp:60
#define MAX_UINT8
Definition: host.h:54
ShapeTable * shape_table_
Definition: classify.h:512
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:323
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
Definition: float2int.cpp:69
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124

◆ ComputeCorrectedRating()

double tesseract::Classify::ComputeCorrectedRating ( bool  debug,
int  unichar_id,
double  cp_rating,
double  im_rating,
int  feature_misses,
int  bottom,
int  top,
int  blob_length,
int  matcher_multiplier,
const uinT8 cn_factors 
)

Definition at line 1241 of file adaptmatch.cpp.

1246  {
1247  // Compute class feature corrections.
1248  double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length,
1249  cn_factors[unichar_id],
1250  matcher_multiplier);
1251  double miss_penalty = tessedit_class_miss_scale * feature_misses;
1252  double vertical_penalty = 0.0;
1253  // Penalize non-alnums for being vertical misfits.
1254  if (!unicharset.get_isalpha(unichar_id) &&
1255  !unicharset.get_isdigit(unichar_id) &&
1256  cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) {
1257  int min_bottom, max_bottom, min_top, max_top;
1258  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
1259  &min_top, &max_top);
1260  if (debug) {
1261  tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1262  top, min_top, max_top, bottom, min_bottom, max_bottom);
1263  }
1264  if (top < min_top || top > max_top ||
1265  bottom < min_bottom || bottom > max_bottom) {
1266  vertical_penalty = classify_misfit_junk_penalty;
1267  }
1268  }
1269  double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
1270  if (result < WORST_POSSIBLE_RATING)
1271  result = WORST_POSSIBLE_RATING;
1272  if (debug) {
1273  tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1274  unicharset.id_to_unichar(unichar_id),
1275  result * 100.0,
1276  cp_rating * 100.0,
1277  (1.0 - im_rating) * 100.0,
1278  (cn_corrected - (1.0 - im_rating)) * 100.0,
1279  cn_factors[unichar_id],
1280  miss_penalty * 100.0,
1281  vertical_penalty * 100.0);
1282  }
1283  return result;
1284 }
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:449
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:526
double tessedit_class_miss_scale
Definition: classify.h:439
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
double classify_misfit_junk_penalty
Definition: classify.h:435
#define tprintf(...)
Definition: tprintf.h:31
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:470
UNICHARSET unicharset
Definition: ccutil.h:70
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
#define WORST_POSSIBLE_RATING
Definition: adaptmatch.cpp:77
IntegerMatcher im_
Definition: classify.h:503

◆ ComputeIntCharNormArray()

void tesseract::Classify::ComputeIntCharNormArray ( const FEATURE_STRUCT norm_feature,
uinT8 char_norm_array 
)

For each class in unicharset, computes the match between norm_feature and the normalization protos for that class. Converts this number to the range from 0 - 255 and stores it into char_norm_array. CharNormArray is indexed by unichar_id.

Globals:

  • PreTrainedTemplates current set of built-in templates
Parameters
norm_featurecharacter normalization feature
[out]char_norm_arrayplace to put results of size unicharset.size()
Note
Exceptions: none
History: Wed Feb 20 11:20:54 1991, DSJ, Created.

Definition at line 69 of file float2int.cpp.

70  {
71  for (int i = 0; i < unicharset.size(); i++) {
72  if (i < PreTrainedTemplates->NumClasses) {
73  int norm_adjust = static_cast<int>(INT_CHAR_NORM_RANGE *
74  ComputeNormMatch(i, norm_feature, FALSE));
75  char_norm_array[i] = ClipToRange(norm_adjust, 0, MAX_INT_CHAR_NORM);
76  } else {
77  // Classes with no templates (eg. ambigs & ligatures) default
78  // to worst match.
79  char_norm_array[i] = MAX_INT_CHAR_NORM;
80  }
81  }
82 } /* ComputeIntCharNormArray */
#define MAX_INT_CHAR_NORM
Definition: float2int.cpp:28
int size() const
Definition: unicharset.h:297
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
#define FALSE
Definition: capi.h:46
FLOAT32 ComputeNormMatch(CLASS_ID ClassId, const FEATURE_STRUCT &feature, BOOL8 DebugMatch)
Definition: normmatch.cpp:88
#define INT_CHAR_NORM_RANGE
Definition: intproto.h:133
UNICHARSET unicharset
Definition: ccutil.h:70

◆ ComputeIntFeatures()

void tesseract::Classify::ComputeIntFeatures ( FEATURE_SET  Features,
INT_FEATURE_ARRAY  IntFeatures 
)

This routine converts each floating point pico-feature in Features into integer format and saves it into IntFeatures.

Globals:

  • none
Parameters
Featuresfloating point pico-features to be converted
[out]IntFeaturesarray to put converted features into
Note
Exceptions: none
History: Wed Feb 20 10:58:45 1991, DSJ, Created.

Definition at line 100 of file float2int.cpp.

101  {
102  int Fid;
103  FEATURE Feature;
104  FLOAT32 YShift;
105 
107  YShift = BASELINE_Y_SHIFT;
108  else
109  YShift = Y_SHIFT;
110 
111  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
112  Feature = Features->Features[Fid];
113 
114  IntFeatures[Fid].X =
116  IntFeatures[Fid].Y =
117  Bucket8For(Feature->Params[PicoFeatY], YShift, INT_FEAT_RANGE);
118  IntFeatures[Fid].Theta = CircBucketFor(Feature->Params[PicoFeatDir],
120  IntFeatures[Fid].CP_misses = 0;
121  }
122 } /* ComputeIntFeatures */
#define ANGLE_SHIFT
Definition: intproto.h:39
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define Y_SHIFT
Definition: intproto.h:41
#define INT_FEAT_RANGE
Definition: float2int.h:27
uinT8 Bucket8For(FLOAT32 param, FLOAT32 offset, int num_buckets)
Definition: intproto.cpp:435
float FLOAT32
Definition: host.h:44
#define BASELINE_Y_SHIFT
Definition: float2int.h:28
uinT8 CircBucketFor(FLOAT32 param, FLOAT32 offset, int num_buckets)
Definition: intproto.cpp:449
FEATURE Features[1]
Definition: ocrfeatures.h:72
#define X_SHIFT
Definition: intproto.h:40

◆ ComputeNormMatch()

FLOAT32 tesseract::Classify::ComputeNormMatch ( CLASS_ID  ClassId,
const FEATURE_STRUCT feature,
BOOL8  DebugMatch 
)

This routine compares Features against each character normalization proto for ClassId and returns the match rating of the best match.

Parameters
ClassIdid of class to match against
featurecharacter normalization feature
DebugMatchcontrols dump of debug info

Globals: NormProtos character normalization prototypes

Returns
Best match rating for Feature against protos of ClassId.
Note
Exceptions: none
History: Wed Dec 19 16:56:12 1990, DSJ, Created.

Definition at line 88 of file normmatch.cpp.

90  {
91  LIST Protos;
92  FLOAT32 BestMatch;
93  FLOAT32 Match;
94  FLOAT32 Delta;
95  PROTOTYPE *Proto;
96  int ProtoId;
97 
98  if (ClassId >= NormProtos->NumProtos) {
99  ClassId = NO_CLASS;
100  }
101 
102  /* handle requests for classification as noise */
103  if (ClassId == NO_CLASS) {
104  /* kludge - clean up constants and make into control knobs later */
105  Match = (feature.Params[CharNormLength] *
106  feature.Params[CharNormLength] * 500.0 +
107  feature.Params[CharNormRx] *
108  feature.Params[CharNormRx] * 8000.0 +
109  feature.Params[CharNormRy] *
110  feature.Params[CharNormRy] * 8000.0);
111  return (1.0 - NormEvidenceOf (Match));
112  }
113 
114  BestMatch = MAX_FLOAT32;
115  Protos = NormProtos->Protos[ClassId];
116 
117  if (DebugMatch) {
118  tprintf("\nChar norm for class %s\n", unicharset.id_to_unichar(ClassId));
119  }
120 
121  ProtoId = 0;
122  iterate(Protos) {
123  Proto = (PROTOTYPE *) first_node (Protos);
124  Delta = feature.Params[CharNormY] - Proto->Mean[CharNormY];
125  Match = Delta * Delta * Proto->Weight.Elliptical[CharNormY];
126  if (DebugMatch) {
127  tprintf("YMiddle: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
128  Proto->Mean[CharNormY], Delta,
129  Proto->Weight.Elliptical[CharNormY], Match);
130  }
131  Delta = feature.Params[CharNormRx] - Proto->Mean[CharNormRx];
132  Match += Delta * Delta * Proto->Weight.Elliptical[CharNormRx];
133  if (DebugMatch) {
134  tprintf("Height: Proto=%g, Delta=%g, Var=%g, Dist=%g\n",
135  Proto->Mean[CharNormRx], Delta,
136  Proto->Weight.Elliptical[CharNormRx], Match);
137  }
138  // Ry is width! See intfx.cpp.
139  Delta = feature.Params[CharNormRy] - Proto->Mean[CharNormRy];
140  if (DebugMatch) {
141  tprintf("Width: Proto=%g, Delta=%g, Var=%g\n",
142  Proto->Mean[CharNormRy], Delta,
143  Proto->Weight.Elliptical[CharNormRy]);
144  }
145  Delta = Delta * Delta * Proto->Weight.Elliptical[CharNormRy];
146  Delta *= kWidthErrorWeighting;
147  Match += Delta;
148  if (DebugMatch) {
149  tprintf("Total Dist=%g, scaled=%g, sigmoid=%g, penalty=%g\n",
150  Match, Match / classify_norm_adj_midpoint,
151  NormEvidenceOf(Match), 256 * (1 - NormEvidenceOf(Match)));
152  }
153 
154  if (Match < BestMatch)
155  BestMatch = Match;
156 
157  ProtoId++;
158  }
159  return 1.0 - NormEvidenceOf(BestMatch);
160 } /* ComputeNormMatch */
#define first_node(l)
Definition: oldlist.h:139
const double kWidthErrorWeighting
Definition: normmatch.cpp:66
LIST * Protos
Definition: normmatch.cpp:42
double classify_norm_adj_midpoint
Definition: normmatch.cpp:63
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define MAX_FLOAT32
Definition: host.h:57
float FLOAT32
Definition: host.h:44
#define NO_CLASS
Definition: matchdefs.h:36
FLOAT32 * Mean
Definition: cluster.h:78
#define tprintf(...)
Definition: tprintf.h:31
double NormEvidenceOf(register double NormAdj)
Definition: normmatch.cpp:184
#define iterate(l)
Definition: oldlist.h:159
UNICHARSET unicharset
Definition: ccutil.h:70
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
NORM_PROTOS * NormProtos
Definition: classify.h:486
FLOATUNION Weight
Definition: cluster.h:83
FLOAT32 * Elliptical
Definition: cluster.h:64

◆ ConvertMatchesToChoices()

void tesseract::Classify::ConvertMatchesToChoices ( const DENORM denorm,
const TBOX box,
ADAPT_RESULTS Results,
BLOB_CHOICE_LIST *  Choices 
)

The function converts the given match ratings to the list of blob choices with ratings and certainties (used by the context checkers). If character fragments are present in the results, this function also makes sure that there is at least one non-fragmented classification included. For each classification result check the unicharset for "definite" ambiguities and modify the resulting Choices accordingly.

Definition at line 1460 of file adaptmatch.cpp.

1462  {
1463  assert(Choices != NULL);
1464  FLOAT32 Rating;
1465  FLOAT32 Certainty;
1466  BLOB_CHOICE_IT temp_it;
1467  bool contains_nonfrag = false;
1468  temp_it.set_to_list(Choices);
1469  int choices_length = 0;
1470  // With no shape_table_ maintain the previous MAX_MATCHES as the maximum
1471  // number of returned results, but with a shape_table_ we want to have room
1472  // for at least the biggest shape (which might contain hundreds of Indic
1473  // grapheme fragments) and more, so use double the size of the biggest shape
1474  // if that is more than the default.
1475  int max_matches = MAX_MATCHES;
1476  if (shape_table_ != NULL) {
1477  max_matches = shape_table_->MaxNumUnichars() * 2;
1478  if (max_matches < MAX_MATCHES)
1479  max_matches = MAX_MATCHES;
1480  }
1481 
1482  float best_certainty = -MAX_FLOAT32;
1483  for (int i = 0; i < Results->match.size(); i++) {
1484  const UnicharRating& result = Results->match[i];
1485  bool adapted = result.adapted;
1486  bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != NULL);
1487  if (temp_it.length()+1 == max_matches &&
1488  !contains_nonfrag && current_is_frag) {
1489  continue; // look for a non-fragmented character to fill the
1490  // last spot in Choices if only fragments are present
1491  }
1492  // BlobLength can never be legally 0, this means recognition failed.
1493  // But we must return a classification result because some invoking
1494  // functions (chopper/permuter) do not anticipate a null blob choice.
1495  // So we need to assign a poor, but not infinitely bad score.
1496  if (Results->BlobLength == 0) {
1497  Certainty = -20;
1498  Rating = 100; // should be -certainty * real_blob_length
1499  } else {
1500  Rating = Certainty = (1.0f - result.rating);
1501  Rating *= rating_scale * Results->BlobLength;
1502  Certainty *= -(getDict().certainty_scale);
1503  }
1504  // Adapted results, by their very nature, should have good certainty.
1505  // Those that don't are at best misleading, and often lead to errors,
1506  // so don't accept adapted results that are too far behind the best result,
1507  // whether adapted or static.
1508  // TODO(rays) find some way of automatically tuning these constants.
1509  if (Certainty > best_certainty) {
1510  best_certainty = MIN(Certainty, classify_adapted_pruning_threshold);
1511  } else if (adapted &&
1512  Certainty / classify_adapted_pruning_factor < best_certainty) {
1513  continue; // Don't accept bad adapted results.
1514  }
1515 
1516  float min_xheight, max_xheight, yshift;
1517  denorm.XHeightRange(result.unichar_id, unicharset, box,
1518  &min_xheight, &max_xheight, &yshift);
1519  BLOB_CHOICE* choice =
1520  new BLOB_CHOICE(result.unichar_id, Rating, Certainty,
1522  min_xheight, max_xheight, yshift,
1523  adapted ? BCC_ADAPTED_CLASSIFIER
1525  choice->set_fonts(result.fonts);
1526  temp_it.add_to_end(choice);
1527  contains_nonfrag |= !current_is_frag; // update contains_nonfrag
1528  choices_length++;
1529  if (choices_length >= max_matches) break;
1530  }
1531  Results->match.truncate(choices_length);
1532 } // ConvertMatchesToChoices
double certainty_scale
Definition: dict.h:609
#define MIN(x, y)
Definition: ndminx.h:28
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
Definition: ratngs.h:94
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
#define MAX_MATCHES
Definition: adaptmatch.cpp:68
#define MAX_FLOAT32
Definition: host.h:57
double classify_adapted_pruning_factor
Definition: classify.h:441
float FLOAT32
Definition: host.h:44
Dict & getDict()
Definition: classify.h:65
void truncate(int size)
int get_script(UNICHAR_ID unichar_id) const
Definition: unicharset.h:611
ShapeTable * shape_table_
Definition: classify.h:512
int size() const
Definition: genericvector.h:72
double classify_adapted_pruning_threshold
Definition: classify.h:443
inT32 BlobLength
Definition: adaptmatch.cpp:83
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
Definition: normalis.cpp:428
UNICHARSET unicharset
Definition: ccutil.h:70
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
int MaxNumUnichars() const
Definition: shapetable.cpp:465
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682

◆ ConvertProto()

void tesseract::Classify::ConvertProto ( PROTO  Proto,
int  ProtoId,
INT_CLASS  Class 
)

This routine converts Proto to integer format and installs it as ProtoId in Class.

Parameters
Protofloating-pt proto to be converted to integer format
ProtoIdid of proto
Classinteger class to add converted proto to
Returns
none
Note
Globals: none
Exceptions: none
History: Fri Feb 8 11:22:43 1991, DSJ, Created.

Definition at line 516 of file intproto.cpp.

516  {
517  INT_PROTO P;
518  FLOAT32 Param;
519 
520  assert(ProtoId < Class->NumProtos);
521 
522  P = ProtoForProtoId(Class, ProtoId);
523 
524  Param = Proto->A * 128;
525  P->A = TruncateParam(Param, -128, 127, NULL);
526 
527  Param = -Proto->B * 256;
528  P->B = TruncateParam(Param, 0, 255, NULL);
529 
530  Param = Proto->C * 128;
531  P->C = TruncateParam(Param, -128, 127, NULL);
532 
533  Param = Proto->Angle * 256;
534  if (Param < 0 || Param >= 256)
535  P->Angle = 0;
536  else
537  P->Angle = (uinT8) Param;
538 
539  /* round proto length to nearest integer number of pico-features */
540  Param = (Proto->Length / GetPicoFeatureLength()) + 0.5;
541  Class->ProtoLengths[ProtoId] = TruncateParam(Param, 1, 255, NULL);
543  cprintf("Converted ffeat to (A=%d,B=%d,C=%d,L=%d)",
544  P->A, P->B, P->C, Class->ProtoLengths[ProtoId]);
545 } /* ConvertProto */
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define ProtoForProtoId(C, P)
Definition: intproto.h:171
unsigned char uinT8
Definition: host.h:32
FLOAT32 Angle
Definition: protos.h:49
FLOAT32 Length
Definition: protos.h:50
FLOAT32 C
Definition: protos.h:46
FLOAT32 A
Definition: protos.h:44
float FLOAT32
Definition: host.h:44
int classify_learning_debug_level
Definition: classify.h:419
int TruncateParam(FLOAT32 Param, int Min, int Max, char *Id)
Definition: intproto.cpp:1858
#define GetPicoFeatureLength()
Definition: picofeat.h:59
uinT8 * ProtoLengths
Definition: intproto.h:112
FLOAT32 B
Definition: protos.h:45

◆ CreateIntTemplates()

INT_TEMPLATES tesseract::Classify::CreateIntTemplates ( CLASSES  FloatProtos,
const UNICHARSET target_unicharset 
)

This routine converts from the old floating point format to the new integer format.

Parameters
FloatProtosprototypes in old floating pt format
target_unicharsetthe UNICHARSET to use
Returns
New set of training templates in integer format.
Note
Globals: none
Exceptions: none
History: Thu Feb 7 14:40:42 1991, DSJ, Created.

Definition at line 557 of file intproto.cpp.

559  {
560  INT_TEMPLATES IntTemplates;
561  CLASS_TYPE FClass;
562  INT_CLASS IClass;
563  int ClassId;
564  int ProtoId;
565  int ConfigId;
566 
567  IntTemplates = NewIntTemplates();
568 
569  for (ClassId = 0; ClassId < target_unicharset.size(); ClassId++) {
570  FClass = &(FloatProtos[ClassId]);
571  if (FClass->NumProtos == 0 && FClass->NumConfigs == 0 &&
572  strcmp(target_unicharset.id_to_unichar(ClassId), " ") != 0) {
573  cprintf("Warning: no protos/configs for %s in CreateIntTemplates()\n",
574  target_unicharset.id_to_unichar(ClassId));
575  }
576  assert(UnusedClassIdIn(IntTemplates, ClassId));
577  IClass = NewIntClass(FClass->NumProtos, FClass->NumConfigs);
578  FontSet fs;
579  fs.size = FClass->font_set.size();
580  fs.configs = new int[fs.size];
581  for (int i = 0; i < fs.size; ++i) {
582  fs.configs[i] = FClass->font_set.get(i);
583  }
584  if (this->fontset_table_.contains(fs)) {
585  IClass->font_set_id = this->fontset_table_.get_id(fs);
586  delete[] fs.configs;
587  } else {
588  IClass->font_set_id = this->fontset_table_.push_back(fs);
589  }
590  AddIntClass(IntTemplates, ClassId, IClass);
591 
592  for (ProtoId = 0; ProtoId < FClass->NumProtos; ProtoId++) {
593  AddIntProto(IClass);
594  ConvertProto(ProtoIn(FClass, ProtoId), ProtoId, IClass);
595  AddProtoToProtoPruner(ProtoIn(FClass, ProtoId), ProtoId, IClass,
597  AddProtoToClassPruner(ProtoIn(FClass, ProtoId), ClassId, IntTemplates);
598  }
599 
600  for (ConfigId = 0; ConfigId < FClass->NumConfigs; ConfigId++) {
601  AddIntConfig(IClass);
602  ConvertConfig(FClass->Configurations[ConfigId], ConfigId, IClass);
603  }
604  }
605  return (IntTemplates);
606 } /* CreateIntTemplates */
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:238
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:384
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
int32_t * configs
Definition: fontinfo.h:141
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
CONFIGS Configurations
Definition: protos.h:64
inT16 NumProtos
Definition: protos.h:59
int size() const
Definition: unicharset.h:297
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
Definition: intproto.cpp:342
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
UnicityTableEqEq< int > font_set
Definition: protos.h:65
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:293
int size() const
Return the size used.
inT16 NumConfigs
Definition: protos.h:62
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:664
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:270
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:516
int classify_learning_debug_level
Definition: classify.h:419
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:723
#define ProtoIn(Class, Pid)
Definition: protos.h:123
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:487
const T & get(int id) const
Return the object from an id.
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266

◆ DebugAdaptiveClassifier()

void tesseract::Classify::DebugAdaptiveClassifier ( TBLOB blob,
ADAPT_RESULTS Results 
)
Parameters
blobblob whose classification is being debugged
Resultsresults of match being debugged

Globals: none

Note
Exceptions: none
History: Wed Mar 13 16:44:41 1991, DSJ, Created.

Definition at line 1547 of file adaptmatch.cpp.

1548  {
1549  if (static_classifier_ == NULL) return;
1550  INT_FX_RESULT_STRUCT fx_info;
1552  TrainingSample* sample =
1553  BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
1554  if (sample == NULL) return;
1555  static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
1556  Results->best_unichar_id);
1557 } /* DebugAdaptiveClassifier */
Pix * pix() const
Definition: normalis.h:248
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
const DENORM & denorm() const
Definition: blobs.h:340
Definition: cluster.h:32

◆ DisplayAdaptedChar()

void tesseract::Classify::DisplayAdaptedChar ( TBLOB blob,
INT_CLASS_STRUCT int_class 
)

Definition at line 979 of file adaptmatch.cpp.

979  {
980 #ifndef GRAPHICS_DISABLED
981  INT_FX_RESULT_STRUCT fx_info;
983  TrainingSample* sample =
985  &bl_features);
986  if (sample == NULL) return;
987 
988  UnicharRating int_result;
989  im_.Match(int_class, AllProtosOn, AllConfigsOn,
990  bl_features.size(), &bl_features[0],
993  tprintf("Best match to temp config %d = %4.1f%%.\n",
994  int_result.config, int_result.rating * 100.0);
996  uinT32 ConfigMask;
997  ConfigMask = 1 << int_result.config;
999  im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
1000  bl_features.size(), &bl_features[0],
1001  &int_result, classify_adapt_feature_threshold,
1002  6 | 0x19, matcher_debug_separate_windows);
1004  }
1005 
1006  delete sample;
1007 #endif
1008 }
#define NO_DEBUG
Definition: adaptmatch.cpp:70
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475
int classify_adapt_feature_threshold
Definition: classify.h:447
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
void UpdateMatchDisplay()
Definition: intproto.cpp:467
bool matcher_debug_separate_windows
Definition: classify.h:458
BIT_VECTOR AllProtosOn
Definition: classify.h:480
bool classify_nonlinear_norm
Definition: classify.h:416
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
int size() const
Definition: genericvector.h:72
Definition: cluster.h:32
unsigned int uinT32
Definition: host.h:36
IntegerMatcher im_
Definition: classify.h:503

◆ DoAdaptiveMatch()

void tesseract::Classify::DoAdaptiveMatch ( TBLOB Blob,
ADAPT_RESULTS Results 
)

This routine performs an adaptive classification. If we have not yet adapted to enough classes, a simple classification to the pre-trained templates is performed. Otherwise, we match the blob against the adapted templates. If the adapted templates do not match well, we try a match against the pre-trained templates. If an adapted template match is found, we do a match to any pre-trained templates which could be ambiguous. The results from all of these classifications are merged together into Results.

Parameters
Blobblob to be classified
Resultsplace to put match results

Globals:

  • PreTrainedTemplates built-in training templates
  • AdaptedTemplates templates adapted for this page
  • matcher_reliable_adaptive_result rating limit for a great match
Note
Exceptions: none
History: Tue Mar 12 08:50:11 1991, DSJ, Created.

Definition at line 1583 of file adaptmatch.cpp.

1583  {
1584  UNICHAR_ID *Ambiguities;
1585 
1586  INT_FX_RESULT_STRUCT fx_info;
1588  TrainingSample* sample =
1590  &bl_features);
1591  if (sample == NULL) return;
1592 
1594  tess_cn_matching) {
1595  CharNormClassifier(Blob, *sample, Results);
1596  } else {
1597  Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
1598  AdaptedTemplates, Results);
1599  if ((!Results->match.empty() &&
1600  MarginalMatch(Results->best_rating,
1602  !tess_bn_matching) ||
1603  Results->match.empty()) {
1604  CharNormClassifier(Blob, *sample, Results);
1605  } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1606  AmbigClassifier(bl_features, fx_info, Blob,
1609  Ambiguities,
1610  Results);
1611  }
1612  }
1613 
1614  // Force the blob to be classified as noise
1615  // if the results contain only fragments.
1616  // TODO(daria): verify that this is better than
1617  // just adding a NULL classification.
1618  if (!Results->HasNonfragment || Results->match.empty())
1619  ClassifyAsNoise(Results);
1620  delete sample;
1621 } /* DoAdaptiveMatch */
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
bool HasNonfragment
Definition: adaptmatch.cpp:84
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
int matcher_permanent_classes_min
Definition: classify.h:426
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
bool classify_nonlinear_norm
Definition: classify.h:416
Definition: cluster.h:32
double matcher_reliable_adaptive_result
Definition: classify.h:421
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
bool empty() const
Definition: genericvector.h:84
FLOAT32 best_rating
Definition: adaptmatch.cpp:87
void ClassifyAsNoise(ADAPT_RESULTS *Results)
bool MarginalMatch(float confidence, float matcher_great_threshold)
Definition: adaptmatch.cpp:122
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
int UNICHAR_ID
Definition: unichar.h:33
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)

◆ EndAdaptiveClassifier()

void tesseract::Classify::EndAdaptiveClassifier ( )

This routine performs cleanup operations on the adaptive classifier. It should be called before the program is terminated. Its main function is to save the adapted templates to a file.

Globals:

Note
Exceptions: none
History: Tue Mar 19 14:37:06 1991, DSJ, Created.

Definition at line 456 of file adaptmatch.cpp.

456  {
457  STRING Filename;
458  FILE *File;
459 
460  if (AdaptedTemplates != NULL &&
462  Filename = imagefile + ADAPT_TEMPLATE_SUFFIX;
463  File = fopen (Filename.string(), "wb");
464  if (File == NULL)
465  cprintf ("Unable to save adapted templates to %s!\n", Filename.string());
466  else {
467  cprintf ("\nSaving adapted templates to %s ...", Filename.string());
468  fflush(stdout);
470  cprintf ("\n");
471  fclose(File);
472  }
473  }
474 
475  if (AdaptedTemplates != NULL) {
477  AdaptedTemplates = NULL;
478  }
479  if (BackupAdaptedTemplates != NULL) {
481  BackupAdaptedTemplates = NULL;
482  }
483 
484  if (PreTrainedTemplates != NULL) {
486  PreTrainedTemplates = NULL;
487  }
489  FreeNormProtos();
490  if (AllProtosOn != NULL) {
495  AllProtosOn = NULL;
496  AllConfigsOn = NULL;
497  AllConfigsOff = NULL;
498  TempProtoMask = NULL;
499  }
500  delete shape_table_;
501  shape_table_ = NULL;
502  if (static_classifier_ != NULL) {
503  delete static_classifier_;
504  static_classifier_ = NULL;
505  }
506 } /* EndAdaptiveClassifier */
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
bool classify_enable_adaptive_matcher
Definition: classify.h:409
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
STRING imagefile
Definition: ccutil.h:72
BIT_VECTOR TempProtoMask
Definition: classify.h:483
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
void EndDangerousAmbigs()
Definition: stopper.cpp:367
const char * string() const
Definition: strngs.cpp:201
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:54
BIT_VECTOR AllProtosOn
Definition: classify.h:480
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:739
Dict & getDict()
Definition: classify.h:65
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
Definition: strngs.h:44
ShapeTable * shape_table_
Definition: classify.h:512
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:66
bool classify_save_adapted_templates
Definition: classify.h:413
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:505

◆ ExpandShapesAndApplyCorrections()

void tesseract::Classify::ExpandShapesAndApplyCorrections ( ADAPT_CLASS classes,
bool  debug,
int  class_id,
int  bottom,
int  top,
float  cp_rating,
int  blob_length,
int  matcher_multiplier,
const uinT8 cn_factors,
UnicharRating int_result,
ADAPT_RESULTS final_results 
)

Definition at line 1167 of file adaptmatch.cpp.

1171  {
1172  if (classes != NULL) {
1173  // Adapted result. Convert configs to fontinfo_ids.
1174  int_result->adapted = true;
1175  for (int f = 0; f < int_result->fonts.size(); ++f) {
1176  int_result->fonts[f].fontinfo_id =
1177  GetFontinfoId(classes[class_id], int_result->fonts[f].fontinfo_id);
1178  }
1179  } else {
1180  // Pre-trained result. Map fonts using font_sets_.
1181  int_result->adapted = false;
1182  for (int f = 0; f < int_result->fonts.size(); ++f) {
1183  int_result->fonts[f].fontinfo_id =
1185  int_result->fonts[f].fontinfo_id);
1186  }
1187  if (shape_table_ != NULL) {
1188  // Two possible cases:
1189  // 1. Flat shapetable. All unichar-ids of the shapes referenced by
1190  // int_result->fonts are the same. In this case build a new vector of
1191  // mapped fonts and replace the fonts in int_result.
1192  // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
1193  // by int_result. In this case, build a vector of UnicharRating to
1194  // gather together different font-ids for each unichar. Also covers case1.
1195  GenericVector<UnicharRating> mapped_results;
1196  for (int f = 0; f < int_result->fonts.size(); ++f) {
1197  int shape_id = int_result->fonts[f].fontinfo_id;
1198  const Shape& shape = shape_table_->GetShape(shape_id);
1199  for (int c = 0; c < shape.size(); ++c) {
1200  int unichar_id = shape[c].unichar_id;
1201  if (!unicharset.get_enabled(unichar_id)) continue;
1202  // Find the mapped_result for unichar_id.
1203  int r = 0;
1204  for (r = 0; r < mapped_results.size() &&
1205  mapped_results[r].unichar_id != unichar_id; ++r) {}
1206  if (r == mapped_results.size()) {
1207  mapped_results.push_back(*int_result);
1208  mapped_results[r].unichar_id = unichar_id;
1209  mapped_results[r].fonts.truncate(0);
1210  }
1211  for (int i = 0; i < shape[c].font_ids.size(); ++i) {
1212  mapped_results[r].fonts.push_back(
1213  ScoredFont(shape[c].font_ids[i], int_result->fonts[f].score));
1214  }
1215  }
1216  }
1217  for (int m = 0; m < mapped_results.size(); ++m) {
1218  mapped_results[m].rating =
1219  ComputeCorrectedRating(debug, mapped_results[m].unichar_id,
1220  cp_rating, int_result->rating,
1221  int_result->feature_misses, bottom, top,
1222  blob_length, matcher_multiplier, cn_factors);
1223  AddNewResult(mapped_results[m], final_results);
1224  }
1225  return;
1226  }
1227  }
1228  if (unicharset.get_enabled(class_id)) {
1229  int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating,
1230  int_result->rating,
1231  int_result->feature_misses,
1232  bottom, top, blob_length,
1233  matcher_multiplier, cn_factors);
1234  AddNewResult(*int_result, final_results);
1235  }
1236 }
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:190
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors)
int push_back(T object)
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
void truncate(int size)
ShapeTable * shape_table_
Definition: classify.h:512
int size() const
Definition: genericvector.h:72
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:323
UNICHARSET unicharset
Definition: ccutil.h:70
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:826

◆ ExtractFeatures()

void tesseract::Classify::ExtractFeatures ( const TBLOB blob,
bool  nonlinear_norm,
GenericVector< INT_FEATURE_STRUCT > *  bl_features,
GenericVector< INT_FEATURE_STRUCT > *  cn_features,
INT_FX_RESULT_STRUCT results,
GenericVector< int > *  outline_cn_counts 
)
static

Definition at line 445 of file intfx.cpp.

450  {
451  DENORM bl_denorm, cn_denorm;
452  tesseract::Classify::SetupBLCNDenorms(blob, nonlinear_norm,
453  &bl_denorm, &cn_denorm, results);
454  if (outline_cn_counts != NULL)
455  outline_cn_counts->truncate(0);
456  // Iterate the outlines.
457  for (TESSLINE* ol = blob.outlines; ol != NULL; ol = ol->next) {
458  // Iterate the polygon.
459  EDGEPT* loop_pt = ol->FindBestStartPt();
460  EDGEPT* pt = loop_pt;
461  if (pt == NULL) continue;
462  do {
463  if (pt->IsHidden()) continue;
464  // Find a run of equal src_outline.
465  EDGEPT* last_pt = pt;
466  do {
467  last_pt = last_pt->next;
468  } while (last_pt != loop_pt && !last_pt->IsHidden() &&
469  last_pt->src_outline == pt->src_outline);
470  last_pt = last_pt->prev;
471  // Until the adaptive classifier can be weaned off polygon segments,
472  // we have to force extraction from the polygon for the bl_features.
473  ExtractFeaturesFromRun(pt, last_pt, bl_denorm, kStandardFeatureLength,
474  true, bl_features);
475  ExtractFeaturesFromRun(pt, last_pt, cn_denorm, kStandardFeatureLength,
476  false, cn_features);
477  pt = last_pt;
478  } while ((pt = pt->next) != loop_pt);
479  if (outline_cn_counts != NULL)
480  outline_cn_counts->push_back(cn_features->size());
481  }
482  results->NumBL = bl_features->size();
483  results->NumCN = cn_features->size();
484  results->YBottom = blob.bounding_box().bottom();
485  results->YTop = blob.bounding_box().top();
486  results->Width = blob.bounding_box().width();
487 }
Definition: blobs.h:76
inT16 width() const
Definition: rect.h:111
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:133
TBOX bounding_box() const
Definition: blobs.cpp:482
const double kStandardFeatureLength
Definition: intfx.h:46
int push_back(T object)
EDGEPT * next
Definition: blobs.h:169
inT16 bottom() const
Definition: rect.h:61
TESSLINE * next
Definition: blobs.h:258
void truncate(int size)
bool IsHidden() const
Definition: blobs.h:153
int size() const
Definition: genericvector.h:72
TESSLINE * outlines
Definition: blobs.h:377
inT16 top() const
Definition: rect.h:54
EDGEPT * prev
Definition: blobs.h:170
C_OUTLINE * src_outline
Definition: blobs.h:171

◆ ExtractIntCNFeatures()

FEATURE_SET tesseract::Classify::ExtractIntCNFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Integer character-normalized features for blob.
Note
Exceptions: none
History: 8/8/2011, rays, Created.

Definition at line 230 of file picofeat.cpp.

231  {
232  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
235  blob, false, &local_fx_info, &bl_features);
236  if (sample == NULL) return NULL;
237 
238  int num_features = sample->num_features();
239  const INT_FEATURE_STRUCT* features = sample->features();
240  FEATURE_SET feature_set = NewFeatureSet(num_features);
241  for (int f = 0; f < num_features; ++f) {
242  FEATURE feature = NewFeature(&IntFeatDesc);
243 
244  feature->Params[IntX] = features[f].X;
245  feature->Params[IntY] = features[f].Y;
246  feature->Params[IntDir] = features[f].Theta;
247  AddFeature(feature_set, feature);
248  }
249  delete sample;
250 
251  return feature_set;
252 } /* ExtractIntCNFeatures */
const FEATURE_DESC_STRUCT IntFeatDesc
Definition: picofeat.h:29
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
Definition: picofeat.h:30
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:44
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:94
Definition: cluster.h:32
FEATURE_SET NewFeatureSet(int NumFeatures)

◆ ExtractIntGeoFeatures()

FEATURE_SET tesseract::Classify::ExtractIntGeoFeatures ( const TBLOB blob,
const INT_FX_RESULT_STRUCT fx_info 
)
Parameters
blobblob to extract features from
fx_info
Returns
Geometric (top/bottom/width) features for blob.
Note
Exceptions: none
History: 8/8/2011, rays, Created.

Definition at line 262 of file picofeat.cpp.

263  {
264  INT_FX_RESULT_STRUCT local_fx_info(fx_info);
267  blob, false, &local_fx_info, &bl_features);
268  if (sample == NULL) return NULL;
269 
270  FEATURE_SET feature_set = NewFeatureSet(1);
271  FEATURE feature = NewFeature(&IntFeatDesc);
272 
273  feature->Params[GeoBottom] = sample->geo_feature(GeoBottom);
274  feature->Params[GeoTop] = sample->geo_feature(GeoTop);
275  feature->Params[GeoWidth] = sample->geo_feature(GeoWidth);
276  AddFeature(feature_set, feature);
277  delete sample;
278 
279  return feature_set;
280 } /* ExtractIntGeoFeatures */
const FEATURE_DESC_STRUCT IntFeatDesc
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
BOOL8 AddFeature(FEATURE_SET FeatureSet, FEATURE Feature)
Definition: ocrfeatures.cpp:44
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:94
Definition: cluster.h:32
FEATURE_SET NewFeatureSet(int NumFeatures)

◆ ExtractOutlineFeatures()

FEATURE_SET tesseract::Classify::ExtractOutlineFeatures ( TBLOB Blob)

Convert each segment in the outline to a feature and return the features.

Parameters
Blobblob to extract pico-features from
Returns
Outline-features for Blob.
Note
Globals: none
Exceptions: none
History:
  • 11/13/90, DSJ, Created.
  • 05/24/91, DSJ, Updated for either char or baseline normalize.

Definition at line 47 of file outfeat.cpp.

47  {
48  LIST Outlines;
49  LIST RemainingOutlines;
50  MFOUTLINE Outline;
51  FEATURE_SET FeatureSet;
52  FLOAT32 XScale, YScale;
53 
54  FeatureSet = NewFeatureSet (MAX_OUTLINE_FEATURES);
55  if (Blob == NULL)
56  return (FeatureSet);
57 
58  Outlines = ConvertBlob (Blob);
59 
60  NormalizeOutlines(Outlines, &XScale, &YScale);
61  RemainingOutlines = Outlines;
62  iterate(RemainingOutlines) {
63  Outline = (MFOUTLINE) first_node (RemainingOutlines);
64  ConvertToOutlineFeatures(Outline, FeatureSet);
65  }
67  NormalizeOutlineX(FeatureSet);
68  FreeOutlines(Outlines);
69  return (FeatureSet);
70 } /* ExtractOutlineFeatures */
#define first_node(l)
Definition: oldlist.h:139
void ConvertToOutlineFeatures(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: outfeat.cpp:122
void NormalizeOutlineX(FEATURE_SET FeatureSet)
Definition: outfeat.cpp:163
LIST MFOUTLINE
Definition: mfoutline.h:33
float FLOAT32
Definition: host.h:44
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:179
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:40
#define iterate(l)
Definition: oldlist.h:159
FEATURE_SET NewFeatureSet(int NumFeatures)
#define MAX_OUTLINE_FEATURES
Definition: outfeat.h:35
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
Definition: mfoutline.cpp:301

◆ ExtractPicoFeatures()

FEATURE_SET tesseract::Classify::ExtractPicoFeatures ( TBLOB Blob)

Operation: Dummy for now.

Globals:

  • classify_norm_method normalization method currently specified
    Parameters
    Blobblob to extract pico-features from
    Returns
    Pico-features for Blob.
    Note
    Exceptions: none
    History: 9/4/90, DSJ, Created.

Definition at line 67 of file picofeat.cpp.

67  {
68  LIST Outlines;
69  LIST RemainingOutlines;
70  MFOUTLINE Outline;
71  FEATURE_SET FeatureSet;
72  FLOAT32 XScale, YScale;
73 
74  FeatureSet = NewFeatureSet(MAX_PICO_FEATURES);
75  Outlines = ConvertBlob(Blob);
76  NormalizeOutlines(Outlines, &XScale, &YScale);
77  RemainingOutlines = Outlines;
78  iterate(RemainingOutlines) {
79  Outline = (MFOUTLINE) first_node (RemainingOutlines);
80  ConvertToPicoFeatures2(Outline, FeatureSet);
81  }
83  NormalizePicoX(FeatureSet);
84  FreeOutlines(Outlines);
85  return (FeatureSet);
86 
87 } /* ExtractPicoFeatures */
#define first_node(l)
Definition: oldlist.h:139
void ConvertToPicoFeatures2(MFOUTLINE Outline, FEATURE_SET FeatureSet)
Definition: picofeat.cpp:163
void NormalizePicoX(FEATURE_SET FeatureSet)
Definition: picofeat.cpp:204
LIST MFOUTLINE
Definition: mfoutline.h:33
float FLOAT32
Definition: host.h:44
void FreeOutlines(LIST Outlines)
Definition: mfoutline.cpp:179
LIST ConvertBlob(TBLOB *blob)
Definition: mfoutline.cpp:40
#define iterate(l)
Definition: oldlist.h:159
FEATURE_SET NewFeatureSet(int NumFeatures)
void NormalizeOutlines(LIST Outlines, FLOAT32 *XScale, FLOAT32 *YScale)
Definition: mfoutline.cpp:301
#define MAX_PICO_FEATURES
Definition: picofeat.h:47

◆ FreeNormProtos()

void tesseract::Classify::FreeNormProtos ( )

Definition at line 162 of file normmatch.cpp.

162  {
163  if (NormProtos != NULL) {
164  for (int i = 0; i < NormProtos->NumProtos; i++)
168  Efree(NormProtos);
169  NormProtos = NULL;
170  }
171 }
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:41
LIST * Protos
Definition: normmatch.cpp:42
void Efree(void *ptr)
Definition: emalloc.cpp:79
NORM_PROTOS * NormProtos
Definition: classify.h:486
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:574

◆ get_fontinfo_table() [1/2]

UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( )
inline

Definition at line 345 of file classify.h.

345  {
346  return fontinfo_table_;
347  }
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488

◆ get_fontinfo_table() [2/2]

const UnicityTable<FontInfo>& tesseract::Classify::get_fontinfo_table ( ) const
inline

Definition at line 348 of file classify.h.

348  {
349  return fontinfo_table_;
350  }
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488

◆ get_fontset_table()

UnicityTable<FontSet>& tesseract::Classify::get_fontset_table ( )
inline

Definition at line 351 of file classify.h.

351  {
352  return fontset_table_;
353  }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496

◆ GetAdaptiveFeatures()

int tesseract::Classify::GetAdaptiveFeatures ( TBLOB Blob,
INT_FEATURE_ARRAY  IntFeatures,
FEATURE_SET FloatFeatures 
)

This routine sets up the feature extractor to extract baseline normalized pico-features.

The extracted pico-features are converted to integer form and placed in IntFeatures. The original floating-pt. features are returned in FloatFeatures.

Globals: none

Parameters
Blobblob to extract features from
[out]IntFeaturesarray to fill with integer features
[out]FloatFeaturesplace to return actual floating-pt features
Returns
Number of pico-features returned (0 if an error occurred)
Note
Exceptions: none
History: Tue Mar 12 17:55:18 1991, DSJ, Created.

Definition at line 812 of file adaptmatch.cpp.

814  {
815  FEATURE_SET Features;
816  int NumFeatures;
817 
818  classify_norm_method.set_value(baseline);
819  Features = ExtractPicoFeatures(Blob);
820 
821  NumFeatures = Features->NumFeatures;
822  if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) {
823  FreeFeatureSet(Features);
824  return 0;
825  }
826 
827  ComputeIntFeatures(Features, IntFeatures);
828  *FloatFeatures = Features;
829 
830  return NumFeatures;
831 } /* GetAdaptiveFeatures */
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:69
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:67
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
Definition: float2int.cpp:100
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:77

◆ GetAmbiguities()

UNICHAR_ID * tesseract::Classify::GetAmbiguities ( TBLOB Blob,
CLASS_ID  CorrectClass 
)

This routine matches blob to the built-in templates to find out if there are any classes other than the correct class which are potential ambiguities.

Parameters
Blobblob to get classification ambiguities for
CorrectClasscorrect class for Blob

Globals:

  • CurrentRatings used by qsort compare routine
  • PreTrainedTemplates built-in templates
Returns
String containing all possible ambiguous classes.
Note
Exceptions: none
History: Fri Mar 15 08:08:22 1991, DSJ, Created.

Definition at line 1640 of file adaptmatch.cpp.

1641  {
1642  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
1643  UNICHAR_ID *Ambiguities;
1644  int i;
1645 
1646  Results->Initialize();
1647  INT_FX_RESULT_STRUCT fx_info;
1649  TrainingSample* sample =
1651  &bl_features);
1652  if (sample == NULL) {
1653  delete Results;
1654  return NULL;
1655  }
1656 
1657  CharNormClassifier(Blob, *sample, Results);
1658  delete sample;
1659  RemoveBadMatches(Results);
1661 
1662  /* copy the class id's into an string of ambiguities - don't copy if
1663  the correct class is the only class id matched */
1664  Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
1665  if (Results->match.size() > 1 ||
1666  (Results->match.size() == 1 &&
1667  Results->match[0].unichar_id != CorrectClass)) {
1668  for (i = 0; i < Results->match.size(); i++)
1669  Ambiguities[i] = Results->match[i].unichar_id;
1670  Ambiguities[i] = -1;
1671  } else {
1672  Ambiguities[0] = -1;
1673  }
1674 
1675  delete Results;
1676  return Ambiguities;
1677 } /* GetAmbiguities */
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
void RemoveBadMatches(ADAPT_RESULTS *Results)
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
bool classify_nonlinear_norm
Definition: classify.h:416
void Initialize()
Definition: adaptmatch.cpp:93
int size() const
Definition: genericvector.h:72
Definition: cluster.h:32
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
int UNICHAR_ID
Definition: unichar.h:33

◆ GetCharNormFeature()

int tesseract::Classify::GetCharNormFeature ( const INT_FX_RESULT_STRUCT fx_info,
INT_TEMPLATES  templates,
uinT8 pruner_norm_array,
uinT8 char_norm_array 
)

This routine calls the integer (Hardware) feature extractor if it has not been called before for this blob.

The results from the feature extractor are placed into globals so that they can be used in other routines without re-extracting the features.

It then copies the char norm features into the IntFeatures array provided by the caller.

Parameters
templatesused to compute char norm adjustments
pruner_norm_arrayArray of factors from blob normalization process
char_norm_arrayarray to fill with dummy char norm adjustments
fx_infoGlobals:
Returns
Number of features extracted or 0 if an error occurred.
Note
Exceptions: none
History: Tue May 28 10:40:52 1991, DSJ, Created.

Definition at line 1728 of file adaptmatch.cpp.

1731  {
1732  FEATURE norm_feature = NewFeature(&CharNormDesc);
1733  float baseline = kBlnBaselineOffset;
1734  float scale = MF_SCALE_FACTOR;
1735  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
1736  norm_feature->Params[CharNormLength] =
1737  fx_info.Length * scale / LENGTH_COMPRESSION;
1738  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
1739  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
1740  // Deletes norm_feature.
1741  ComputeCharNormArrays(norm_feature, templates, char_norm_array,
1742  pruner_norm_array);
1743  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
1744 } /* GetCharNormFeature */
const int kBlnBaselineOffset
Definition: normalis.h:29
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
int IntCastRounded(double x)
Definition: helpers.h:172
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
const double kStandardFeatureLength
Definition: intfx.h:46
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
#define MF_SCALE_FACTOR
Definition: mfoutline.h:63
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:94
const FEATURE_DESC_STRUCT CharNormDesc

◆ GetClassToDebug()

CLASS_ID tesseract::Classify::GetClassToDebug ( const char *  Prompt,
bool *  adaptive_on,
bool *  pretrained_on,
int *  shape_id 
)

This routine prompts the user with Prompt and waits for the user to enter something in the debug window.

Parameters
Promptprompt to print while waiting for input from window
adaptive_on
pretrained_on
shape_id
Returns
Character entered in the debug window.
Note
Globals: none
Exceptions: none
History: Thu Mar 21 16:55:13 1991, DSJ, Created.

Definition at line 1391 of file intproto.cpp.

1392  {
1393  tprintf("%s\n", Prompt);
1394  SVEvent* ev;
1395  SVEventType ev_type;
1396  int unichar_id = INVALID_UNICHAR_ID;
1397  // Wait until a click or popup event.
1398  do {
1400  ev_type = ev->type;
1401  if (ev_type == SVET_POPUP) {
1402  if (ev->command_id == IDA_SHAPE_INDEX) {
1403  if (shape_table_ != NULL) {
1404  *shape_id = atoi(ev->parameter);
1405  *adaptive_on = false;
1406  *pretrained_on = true;
1407  if (*shape_id >= 0 && *shape_id < shape_table_->NumShapes()) {
1408  int font_id;
1409  shape_table_->GetFirstUnicharAndFont(*shape_id, &unichar_id,
1410  &font_id);
1411  tprintf("Shape %d, first unichar=%d, font=%d\n",
1412  *shape_id, unichar_id, font_id);
1413  return unichar_id;
1414  }
1415  tprintf("Shape index '%s' not found in shape table\n", ev->parameter);
1416  } else {
1417  tprintf("No shape table loaded!\n");
1418  }
1419  } else {
1421  unichar_id = unicharset.unichar_to_id(ev->parameter);
1422  if (ev->command_id == IDA_ADAPTIVE) {
1423  *adaptive_on = true;
1424  *pretrained_on = false;
1425  *shape_id = -1;
1426  } else if (ev->command_id == IDA_STATIC) {
1427  *adaptive_on = false;
1428  *pretrained_on = true;
1429  } else {
1430  *adaptive_on = true;
1431  *pretrained_on = true;
1432  }
1433  if (ev->command_id == IDA_ADAPTIVE || shape_table_ == NULL) {
1434  *shape_id = -1;
1435  return unichar_id;
1436  }
1437  for (int s = 0; s < shape_table_->NumShapes(); ++s) {
1438  if (shape_table_->GetShape(s).ContainsUnichar(unichar_id)) {
1439  tprintf("%s\n", shape_table_->DebugStr(s).string());
1440  }
1441  }
1442  } else {
1443  tprintf("Char class '%s' not found in unicharset",
1444  ev->parameter);
1445  }
1446  }
1447  }
1448  delete ev;
1449  } while (ev_type != SVET_CLICK);
1450  return 0;
1451 } /* GetClassToDebug */
SVEventType
Definition: scrollview.h:45
int NumShapes() const
Definition: shapetable.h:278
SVEventType type
Definition: scrollview.h:64
bool TESS_API contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
bool ContainsUnichar(int unichar_id) const
Definition: shapetable.cpp:156
const char * string() const
Definition: strngs.cpp:201
char * parameter
Definition: scrollview.h:71
ScrollView * IntMatchWindow
Definition: intproto.cpp:179
SVEvent * AwaitEvent(SVEventType type)
Definition: scrollview.cpp:449
void GetFirstUnicharAndFont(int shape_id, int *unichar_id, int *font_id) const
Definition: shapetable.cpp:414
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
#define tprintf(...)
Definition: tprintf.h:31
ShapeTable * shape_table_
Definition: classify.h:512
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:323
UNICHARSET unicharset
Definition: ccutil.h:70
int command_id
Definition: scrollview.h:70
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:291

◆ getDict()

Dict& tesseract::Classify::getDict ( )
inline

Definition at line 65 of file classify.h.

65  {
66  return dict_;
67  }

◆ GetFontinfoId()

int tesseract::Classify::GetFontinfoId ( ADAPT_CLASS  Class,
uinT8  ConfigId 
)

Definition at line 190 of file adaptive.cpp.

190  {
191  return (ConfigIsPermanent(Class, ConfigId) ?
192  PermConfigFor(Class, ConfigId)->FontinfoId :
193  TempConfigFor(Class, ConfigId)->FontinfoId);
194 }
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:105
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93

◆ InitAdaptedClass()

void tesseract::Classify::InitAdaptedClass ( TBLOB Blob,
CLASS_ID  ClassId,
int  FontinfoId,
ADAPT_CLASS  Class,
ADAPT_TEMPLATES  Templates 
)

This routine creates a new adapted class and uses Blob as the model for the first config in that class.

Parameters
Blobblob to model new class after
ClassIdid of the class to be initialized
FontinfoIdfont information inferred from pre-trained templates
Classadapted class to be initialized
Templatesadapted templates to add new class to

Globals:

Note
Exceptions: none
History: Thu Mar 14 12:49:39 1991, DSJ, Created.

Definition at line 717 of file adaptmatch.cpp.

721  {
722  FEATURE_SET Features;
723  int Fid, Pid;
724  FEATURE Feature;
725  int NumFeatures;
726  TEMP_PROTO TempProto;
727  PROTO Proto;
728  INT_CLASS IClass;
730 
731  classify_norm_method.set_value(baseline);
732  Features = ExtractOutlineFeatures(Blob);
733  NumFeatures = Features->NumFeatures;
734  if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) {
735  FreeFeatureSet(Features);
736  return;
737  }
738 
739  Config = NewTempConfig(NumFeatures - 1, FontinfoId);
740  TempConfigFor(Class, 0) = Config;
741 
742  /* this is a kludge to construct cutoffs for adapted templates */
743  if (Templates == AdaptedTemplates)
744  BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
745 
746  IClass = ClassForClassId (Templates->Templates, ClassId);
747 
748  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
749  Pid = AddIntProto (IClass);
750  assert (Pid != NO_PROTO);
751 
752  Feature = Features->Features[Fid];
753  TempProto = NewTempProto ();
754  Proto = &(TempProto->Proto);
755 
756  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
757  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
758  instead of the -0.25 to 0.75 used in baseline normalization */
759  Proto->Angle = Feature->Params[OutlineFeatDir];
760  Proto->X = Feature->Params[OutlineFeatX];
761  Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
762  Proto->Length = Feature->Params[OutlineFeatLength];
763  FillABC(Proto);
764 
765  TempProto->ProtoId = Pid;
766  SET_BIT (Config->Protos, Pid);
767 
768  ConvertProto(Proto, Pid, IClass);
769  AddProtoToProtoPruner(Proto, Pid, IClass,
771 
772  Class->TempProtos = push (Class->TempProtos, TempProto);
773  }
774  FreeFeatureSet(Features);
775 
776  AddIntConfig(IClass);
777  ConvertConfig (AllProtosOn, 0, IClass);
778 
780  tprintf("Added new class '%s' with class id %d and %d protos.\n",
781  unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
783  DisplayAdaptedChar(Blob, IClass);
784  }
785 
786  if (IsEmptyAdaptedClass(Class))
787  (Templates->NumNonEmptyClasses)++;
788 } /* InitAdaptedClass */
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:384
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:223
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
uinT16 ProtoId
Definition: adaptive.h:30
CLUSTERCONFIG Config
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
FLOAT32 Angle
Definition: protos.h:49
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
FLOAT32 X
Definition: protos.h:47
FLOAT32 Length
Definition: protos.h:50
PROTO_STRUCT Proto
Definition: adaptive.h:32
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:293
#define ClassForClassId(T, c)
Definition: intproto.h:181
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:69
void FillABC(PROTO Proto)
Definition: protos.cpp:198
#define SET_BIT(array, bit)
Definition: bitvec.h:57
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:254
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
Definition: outfeat.cpp:47
BIT_VECTOR AllProtosOn
Definition: classify.h:480
#define NO_PROTO
Definition: matchdefs.h:42
INT_TEMPLATES Templates
Definition: adaptive.h:77
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:270
LIST push(LIST list, void *element)
Definition: oldlist.cpp:317
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:516
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
FLOAT32 Y
Definition: protos.h:48
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:979
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:487
FEATURE Features[1]
Definition: ocrfeatures.h:72
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:77
UNICHARSET unicharset
Definition: ccutil.h:70
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:75

◆ InitAdaptiveClassifier()

void tesseract::Classify::InitAdaptiveClassifier ( bool  load_pre_trained_templates)

This routine reads in the training information needed by the adaptive classifier and saves it into global variables. Parameters: load_pre_trained_templates Indicates whether the pre-trained templates (inttemp, normproto and pffmtable components) should be loaded. Should only be set to true if the necessary classifier components are present in the [lang].traineddata file. Globals: BuiltInTemplatesFile file to get built-in temps from BuiltInCutoffsFile file to get avg. feat per class from classify_use_pre_adapted_templates enables use of pre-adapted templates

Note
History: Mon Mar 11 12:49:34 1991, DSJ, Created.

Definition at line 527 of file adaptmatch.cpp.

527  {
529  return;
530  if (AllProtosOn != NULL)
531  EndAdaptiveClassifier(); // Don't leak with multiple inits.
532 
533  // If there is no language_data_path_prefix, the classifier will be
534  // adaptive only.
535  if (language_data_path_prefix.length() > 0 &&
536  load_pre_trained_templates) {
540  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded inttemp\n");
541 
543  shape_table_ = new ShapeTable(unicharset);
546  tprintf("Error loading shape table!\n");
547  delete shape_table_;
548  shape_table_ = NULL;
549  } else if (tessdata_manager.DebugLevel() > 0) {
550  tprintf("Successfully loaded shape table!\n");
551  }
552  }
553 
558  CharNormCutoffs);
559  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded pffmtable\n");
560 
562  NormProtos =
565  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded normproto\n");
566  static_classifier_ = new TessClassifier(false, this);
567  }
568 
570  InitIntegerFX();
571 
579 
580  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
581  BaselineCutoffs[i] = 0;
582  }
583 
585  FILE *File;
586  STRING Filename;
587 
588  Filename = imagefile;
589  Filename += ADAPT_TEMPLATE_SUFFIX;
590  File = fopen(Filename.string(), "rb");
591  if (File == NULL) {
593  } else {
594  cprintf("\nReading pre-adapted templates from %s ...\n",
595  Filename.string());
596  fflush(stdout);
598  cprintf("\n");
599  fclose(File);
601 
602  for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
603  BaselineCutoffs[i] = CharNormCutoffs[i];
604  }
605  }
606  } else {
607  if (AdaptedTemplates != NULL)
610  }
611 } /* InitAdaptiveClassifier */
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
inT64 GetEndOffset(TessdataType tessdata_type) const
INT_TEMPLATES ReadIntTemplates(FILE *File)
Definition: intproto.cpp:761
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
bool DeSerialize(bool swap, FILE *fp)
Definition: shapetable.cpp:256
bool classify_enable_adaptive_matcher
Definition: classify.h:409
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void Init(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:679
inT32 length() const
Definition: strngs.cpp:196
STRING imagefile
Definition: ccutil.h:72
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:273
BIT_VECTOR TempProtoMask
Definition: classify.h:483
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
Definition: cutoffs.cpp:52
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:89
#define set_all_bits(array, length)
Definition: bitvec.h:41
#define MAX_NUM_PROTOS
Definition: intproto.h:47
bool classify_use_pre_adapted_templates
Definition: classify.h:411
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
TessdataManager tessdata_manager
Definition: ccutil.h:69
const char * string() const
Definition: strngs.cpp:201
NORM_PROTOS * ReadNormProtos(FILE *File, inT64 end_offset)
Definition: normmatch.cpp:245
ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File)
Definition: adaptive.cpp:369
BIT_VECTOR AllProtosOn
Definition: classify.h:480
INT_TEMPLATES Templates
Definition: adaptive.h:77
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
#define tprintf(...)
Definition: tprintf.h:31
Definition: strngs.h:44
void InitIntegerFX()
Definition: intfx.cpp:55
ShapeTable * shape_table_
Definition: classify.h:512
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:456
STRING language_data_path_prefix
Definition: ccutil.h:68
bool SeekToStart(TessdataType tessdata_type)
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:66
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
UNICHARSET unicharset
Definition: ccutil.h:70
#define zero_all_bits(array, length)
Definition: bitvec.h:33
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
#define ASSERT_HOST(x)
Definition: errcode.h:84
NORM_PROTOS * NormProtos
Definition: classify.h:486
IntegerMatcher im_
Definition: classify.h:503

◆ LargeSpeckle()

bool tesseract::Classify::LargeSpeckle ( const TBLOB blob)

Definition at line 235 of file classify.cpp.

235  {
236  double speckle_size = kBlnXHeight * speckle_large_max_size;
237  TBOX bbox = blob.bounding_box();
238  return bbox.width() < speckle_size && bbox.height() < speckle_size;
239 }
inT16 width() const
Definition: rect.h:111
TBOX bounding_box() const
Definition: blobs.cpp:482
const int kBlnXHeight
Definition: normalis.h:28
inT16 height() const
Definition: rect.h:104
double speckle_large_max_size
Definition: classify.h:501
Definition: rect.h:30

◆ LearnBlob()

void tesseract::Classify::LearnBlob ( const STRING fontname,
TBLOB Blob,
const DENORM cn_denorm,
const INT_FX_RESULT_STRUCT fx_info,
const char *  blob_text 
)

Definition at line 69 of file blobclass.cpp.

72  {
74  CharDesc->FeatureSets[0] = ExtractMicros(blob, cn_denorm);
75  CharDesc->FeatureSets[1] = ExtractCharNormFeatures(fx_info);
76  CharDesc->FeatureSets[2] = ExtractIntCNFeatures(*blob, fx_info);
77  CharDesc->FeatureSets[3] = ExtractIntGeoFeatures(*blob, fx_info);
78 
79  if (ValidCharDescription(feature_defs_, CharDesc)) {
80  // Label the features with a class name and font name.
81  tr_file_data_ += "\n";
82  tr_file_data_ += fontname;
83  tr_file_data_ += " ";
84  tr_file_data_ += blob_text;
85  tr_file_data_ += "\n";
86 
87  // write micro-features to file and clean up
88  WriteCharDescription(feature_defs_, CharDesc, &tr_file_data_);
89  } else {
90  tprintf("Blob learned was invalid!\n");
91  }
92  FreeCharDescription(CharDesc);
93 } // LearnBlob
FEATURE_SET ExtractCharNormFeatures(const INT_FX_RESULT_STRUCT &fx_info)
Definition: normfeat.cpp:61
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:44
FEATURE_DEFS_STRUCT feature_defs_
Definition: classify.h:507
CHAR_DESC NewCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs)
Definition: featdefs.cpp:164
FEATURE_SET ExtractIntCNFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:230
FEATURE_SET ExtractIntGeoFeatures(const TBLOB &blob, const INT_FX_RESULT_STRUCT &fx_info)
Definition: picofeat.cpp:262
#define tprintf(...)
Definition: tprintf.h:31
bool ValidCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc)
Definition: featdefs.cpp:219
void WriteCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, CHAR_DESC CharDesc, STRING *str)
Definition: featdefs.cpp:197
FEATURE_SET ExtractMicros(TBLOB *Blob, const DENORM &cn_denorm)
Definition: mf.cpp:45
void FreeCharDescription(CHAR_DESC CharDesc)
Definition: featdefs.cpp:141

◆ LearnPieces()

void tesseract::Classify::LearnPieces ( const char *  fontname,
int  start,
int  length,
float  threshold,
CharSegmentationType  segmentation,
const char *  correct_text,
WERD_RES word 
)

Definition at line 368 of file adaptmatch.cpp.

370  {
371  // TODO(daria) Remove/modify this if/when we want
372  // to train and/or adapt to n-grams.
373  if (segmentation != CST_WHOLE &&
374  (segmentation != CST_FRAGMENT || disable_character_fragments))
375  return;
376 
377  if (length > 1) {
378  SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start,
379  start + length - 1);
380  }
381  TBLOB* blob = word->chopped_word->blobs[start];
382  // Rotate the blob if needed for classification.
383  TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded();
384  if (rotated_blob == NULL)
385  rotated_blob = blob;
386 
387  #ifndef GRAPHICS_DISABLED
388  // Draw debug windows showing the blob that is being learned if needed.
389  if (strcmp(classify_learn_debug_str.string(), correct_text) == 0) {
390  RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600,
391  word->chopped_word->bounding_box());
392  rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN);
393  learn_debug_win_->Update();
394  window_wait(learn_debug_win_);
395  }
396  if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) {
397  ASSERT_HOST(learn_fragments_debug_win_ != NULL); // set up in LearnWord
398  blob->plot(learn_fragments_debug_win_,
400  learn_fragments_debug_win_->Update();
401  }
402  #endif // GRAPHICS_DISABLED
403 
404  if (fontname != NULL) {
405  classify_norm_method.set_value(character); // force char norm spc 30/11/93
406  tess_bn_matching.set_value(false); // turn it off
407  tess_cn_matching.set_value(false);
408  DENORM bl_denorm, cn_denorm;
409  INT_FX_RESULT_STRUCT fx_info;
411  &bl_denorm, &cn_denorm, &fx_info);
412  LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
413  } else if (unicharset.contains_unichar(correct_text)) {
414  UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
415  int font_id = word->fontinfo != NULL
416  ? fontinfo_table_.get_id(*word->fontinfo)
417  : 0;
419  tprintf("Adapting to char = %s, thr= %g font_id= %d\n",
420  unicharset.id_to_unichar(class_id), threshold, font_id);
421  // If filename is not NULL we are doing recognition
422  // (as opposed to training), so we must have already set word fonts.
423  AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates);
424  if (BackupAdaptedTemplates != NULL) {
425  // Adapt the backup templates too. They will be used if the primary gets
426  // too full.
427  AdaptToChar(rotated_blob, class_id, font_id, threshold,
429  }
430  } else if (classify_debug_level >= 1) {
431  tprintf("Can't adapt to %s not in unicharset\n", correct_text);
432  }
433  if (rotated_blob != blob) {
434  delete rotated_blob;
435  }
436 
437  SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start,
438  start + length - 1);
439 } // LearnPieces.
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
static void BreakPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:194
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
bool TESS_API contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
static void Update()
Definition: scrollview.cpp:715
static void JoinPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:216
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:133
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:220
char * classify_learn_debug_str
Definition: classify.h:459
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
bool classify_debug_character_fragments
Definition: classify.h:455
const FontInfo * fontinfo
Definition: pageres.h:288
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
Definition: blobs.cpp:524
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
Definition: blobclass.cpp:69
TWERD * chopped_word
Definition: pageres.h:201
TBOX bounding_box() const
Definition: blobs.cpp:881
TBLOB * ClassifyNormalizeIfNeeded() const
Definition: blobs.cpp:363
bool classify_nonlinear_norm
Definition: classify.h:416
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
bool disable_character_fragments
Definition: classify.h:450
Definition: blobs.h:261
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:886
UNICHARSET unicharset
Definition: ccutil.h:70
char window_wait(ScrollView *win)
Definition: callcpp.cpp:111
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
#define ASSERT_HOST(x)
Definition: errcode.h:84
int UNICHAR_ID
Definition: unichar.h:33

◆ LearnWord()

void tesseract::Classify::LearnWord ( const char *  fontname,
WERD_RES word 
)

Definition at line 244 of file adaptmatch.cpp.

244  {
245  int word_len = word->correct_text.size();
246  if (word_len == 0) return;
247 
248  float* thresholds = NULL;
249  if (fontname == NULL) {
250  // Adaption mode.
251  if (!EnableLearning || word->best_choice == NULL)
252  return; // Can't or won't adapt.
253 
255  tprintf("\n\nAdapting to word = %s\n",
256  word->best_choice->debug_string().string());
257  thresholds = new float[word_len];
261  matcher_rating_margin, thresholds);
262  }
263  int start_blob = 0;
264 
265  #ifndef GRAPHICS_DISABLED
267  if (learn_fragmented_word_debug_win_ != NULL) {
268  window_wait(learn_fragmented_word_debug_win_);
269  }
270  RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400,
271  word->chopped_word->bounding_box());
272  RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200,
273  word->chopped_word->bounding_box());
274  word->chopped_word->plot(learn_fragmented_word_debug_win_);
276  }
277  #endif // GRAPHICS_DISABLED
278 
279  for (int ch = 0; ch < word_len; ++ch) {
281  tprintf("\nLearning %s\n", word->correct_text[ch].string());
282  }
283  if (word->correct_text[ch].length() > 0) {
284  float threshold = thresholds != NULL ? thresholds[ch] : 0.0f;
285 
286  LearnPieces(fontname, start_blob, word->best_state[ch], threshold,
287  CST_WHOLE, word->correct_text[ch].string(), word);
288 
289  if (word->best_state[ch] > 1 && !disable_character_fragments) {
290  // Check that the character breaks into meaningful fragments
291  // that each match a whole character with at least
292  // classify_character_fragments_garbage_certainty_threshold
293  bool garbage = false;
294  int frag;
295  for (frag = 0; frag < word->best_state[ch]; ++frag) {
296  TBLOB* frag_blob = word->chopped_word->blobs[start_blob + frag];
298  garbage |= LooksLikeGarbage(frag_blob);
299  }
300  }
301  // Learn the fragments.
302  if (!garbage) {
303  bool pieces_all_natural = word->PiecesAllNatural(start_blob,
304  word->best_state[ch]);
305  if (pieces_all_natural || !prioritize_division) {
306  for (frag = 0; frag < word->best_state[ch]; ++frag) {
307  GenericVector<STRING> tokens;
308  word->correct_text[ch].split(' ', &tokens);
309 
310  tokens[0] = CHAR_FRAGMENT::to_string(
311  tokens[0].string(), frag, word->best_state[ch],
312  pieces_all_natural);
313 
314  STRING full_string;
315  for (int i = 0; i < tokens.size(); i++) {
316  full_string += tokens[i];
317  if (i != tokens.size() - 1)
318  full_string += ' ';
319  }
320  LearnPieces(fontname, start_blob + frag, 1, threshold,
321  CST_FRAGMENT, full_string.string(), word);
322  }
323  }
324  }
325  }
326 
327  // TODO(rays): re-enable this part of the code when we switch to the
328  // new classifier that needs to see examples of garbage.
329  /*
330  if (word->best_state[ch] > 1) {
331  // If the next blob is good, make junk with the rightmost fragment.
332  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
333  LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
334  word->best_state[ch + 1] + 1,
335  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
336  }
337  // If the previous blob is good, make junk with the leftmost fragment.
338  if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
339  LearnPieces(fontname, start_blob - word->best_state[ch - 1],
340  word->best_state[ch - 1] + 1,
341  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
342  }
343  }
344  // If the next blob is good, make a join with it.
345  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
346  STRING joined_text = word->correct_text[ch];
347  joined_text += word->correct_text[ch + 1];
348  LearnPieces(fontname, start_blob,
349  word->best_state[ch] + word->best_state[ch + 1],
350  threshold, CST_NGRAM, joined_text.string(), word);
351  }
352  */
353  }
354  start_blob += word->best_state[ch];
355  }
356  delete [] thresholds;
357 } // LearnWord.
double matcher_rating_margin
Definition: classify.h:424
bool PiecesAllNatural(int start, int count) const
Definition: pageres.cpp:1072
double matcher_good_threshold
Definition: classify.h:420
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:453
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
static void Update()
Definition: scrollview.cpp:715
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
Definition: adaptmatch.cpp:368
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:220
bool prioritize_division
Definition: classify.h:387
WERD_CHOICE * best_choice
Definition: pageres.h:219
double matcher_perfect_threshold
Definition: classify.h:422
bool LooksLikeGarbage(TBLOB *blob)
bool classify_debug_character_fragments
Definition: classify.h:455
const char * string() const
Definition: strngs.cpp:201
void plot(ScrollView *window)
Definition: blobs.cpp:918
TWERD * chopped_word
Definition: pageres.h:201
TBOX bounding_box() const
Definition: blobs.cpp:881
GenericVector< STRING > correct_text
Definition: pageres.h:259
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
Definition: strngs.h:44
int size() const
Definition: genericvector.h:72
bool disable_character_fragments
Definition: classify.h:450
Definition: blobs.h:261
STRING to_string() const
Definition: unicharset.h:73
int length() const
Definition: genericvector.h:79
GenericVector< int > best_state
Definition: pageres.h:255
char window_wait(ScrollView *win)
Definition: callcpp.cpp:111
const STRING debug_string() const
Definition: ratngs.h:503
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
Definition: pageres.cpp:553
double certainty_scale
Definition: classify.h:437

◆ LooksLikeGarbage()

bool tesseract::Classify::LooksLikeGarbage ( TBLOB blob)

Definition at line 1681 of file adaptmatch.cpp.

1681  {
1682  BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();
1683  AdaptiveClassifier(blob, ratings);
1684  BLOB_CHOICE_IT ratings_it(ratings);
1687  print_ratings_list("======================\nLooksLikeGarbage() got ",
1688  ratings, unicharset);
1689  }
1690  for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
1691  ratings_it.forward()) {
1692  if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != NULL) {
1693  continue;
1694  }
1695  float certainty = ratings_it.data()->certainty();
1696  delete ratings;
1697  return certainty <
1699  }
1700  delete ratings;
1701  return true; // no whole characters in ratings
1702 }
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:453
const UNICHARSET & getUnicharset() const
Definition: dict.h:97
bool classify_debug_character_fragments
Definition: classify.h:455
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:185
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:819
Dict & getDict()
Definition: classify.h:65
UNICHARSET unicharset
Definition: ccutil.h:70
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682

◆ MakeNewTemporaryConfig()

int tesseract::Classify::MakeNewTemporaryConfig ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  FontinfoId,
int  NumFeatures,
INT_FEATURE_ARRAY  Features,
FEATURE_SET  FloatFeatures 
)
Parameters
Templatesadapted templates to add new config to
ClassIdclass id to associate with new config
FontinfoIdfont information inferred from pre-trained templates
NumFeaturesnumber of features in IntFeatures
Featuresfeatures describing model for new config
FloatFeaturesfloating-pt representation of features
Returns
The id of the new config created, a negative integer in case of error.
Note
Exceptions: none
History: Fri Mar 15 08:49:46 1991, DSJ, Created.

Definition at line 1792 of file adaptmatch.cpp.

1797  {
1798  INT_CLASS IClass;
1799  ADAPT_CLASS Class;
1800  PROTO_ID OldProtos[MAX_NUM_PROTOS];
1801  FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
1802  int NumOldProtos;
1803  int NumBadFeatures;
1804  int MaxProtoId, OldMaxProtoId;
1805  int BlobLength = 0;
1806  int MaskSize;
1807  int ConfigId;
1809  int i;
1810  int debug_level = NO_DEBUG;
1811 
1813  debug_level =
1815 
1816  IClass = ClassForClassId(Templates->Templates, ClassId);
1817  Class = Templates->Class[ClassId];
1818 
1819  if (IClass->NumConfigs >= MAX_NUM_CONFIGS) {
1820  ++NumAdaptationsFailed;
1822  cprintf("Cannot make new temporary config: maximum number exceeded.\n");
1823  return -1;
1824  }
1825 
1826  OldMaxProtoId = IClass->NumProtos - 1;
1827 
1828  NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff,
1829  BlobLength, NumFeatures, Features,
1830  OldProtos, classify_adapt_proto_threshold,
1831  debug_level);
1832 
1833  MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS);
1834  zero_all_bits(TempProtoMask, MaskSize);
1835  for (i = 0; i < NumOldProtos; i++)
1836  SET_BIT(TempProtoMask, OldProtos[i]);
1837 
1838  NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn,
1839  BlobLength, NumFeatures, Features,
1840  BadFeatures,
1842  debug_level);
1843 
1844  MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures,
1845  IClass, Class, TempProtoMask);
1846  if (MaxProtoId == NO_PROTO) {
1847  ++NumAdaptationsFailed;
1849  cprintf("Cannot make new temp protos: maximum number exceeded.\n");
1850  return -1;
1851  }
1852 
1853  ConfigId = AddIntConfig(IClass);
1854  ConvertConfig(TempProtoMask, ConfigId, IClass);
1855  Config = NewTempConfig(MaxProtoId, FontinfoId);
1856  TempConfigFor(Class, ConfigId) = Config;
1857  copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize);
1858 
1860  cprintf("Making new temp config %d fontinfo id %d"
1861  " using %d old and %d new protos.\n",
1862  ConfigId, Config->FontinfoId,
1863  NumOldProtos, MaxProtoId - OldMaxProtoId);
1864 
1865  return ConfigId;
1866 } /* MakeNewTemporaryConfig */
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:557
#define copy_all_bits(source, dest, length)
Definition: bitvec.h:49
#define NO_DEBUG
Definition: adaptmatch.cpp:70
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:223
uinT16 NumProtos
Definition: intproto.h:108
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
#define PRINT_PROTO_MATCHES
Definition: intproto.h:194
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
CLUSTERCONFIG Config
inT16 PROTO_ID
Definition: matchdefs.h:41
BIT_VECTOR TempProtoMask
Definition: classify.h:483
int classify_adapt_feature_threshold
Definition: classify.h:447
#define MAX_NUM_PROTOS
Definition: intproto.h:47
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
#define ClassForClassId(T, c)
Definition: intproto.h:181
#define SET_BIT(array, bit)
Definition: bitvec.h:57
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132
uinT8 FEATURE_ID
Definition: matchdefs.h:47
BIT_VECTOR AllProtosOn
Definition: classify.h:480
#define NO_PROTO
Definition: matchdefs.h:42
INT_TEMPLATES Templates
Definition: adaptive.h:77
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:270
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
int classify_adapt_proto_threshold
Definition: classify.h:445
int classify_learning_debug_level
Definition: classify.h:419
#define PRINT_MATCH_SUMMARY
Definition: intproto.h:190
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:627
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:487
#define PRINT_FEATURE_MATCHES
Definition: intproto.h:193
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
#define zero_all_bits(array, length)
Definition: bitvec.h:33
uinT8 NumConfigs
Definition: intproto.h:110
IntegerMatcher im_
Definition: classify.h:503
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81

◆ MakeNewTempProtos()

PROTO_ID tesseract::Classify::MakeNewTempProtos ( FEATURE_SET  Features,
int  NumBadFeat,
FEATURE_ID  BadFeat[],
INT_CLASS  IClass,
ADAPT_CLASS  Class,
BIT_VECTOR  TempProtoMask 
)

This routine finds sets of sequential bad features that all have the same angle and converts each set into a new temporary proto. The temp proto is added to the proto pruner for IClass, pushed onto the list of temp protos in Class, and added to TempProtoMask.

Parameters
Featuresfloating-pt features describing new character
NumBadFeatnumber of bad features to turn into protos
BadFeatfeature id's of bad features
IClassinteger class templates to add new protos to
Classadapted class templates to add new protos to
TempProtoMaskproto mask to add new protos to

Globals: none

Returns
Max proto id in class after all protos have been added. Exceptions: none History: Fri Mar 15 11:39:38 1991, DSJ, Created.

Definition at line 1889 of file adaptmatch.cpp.

1894  {
1895  FEATURE_ID *ProtoStart;
1896  FEATURE_ID *ProtoEnd;
1897  FEATURE_ID *LastBad;
1898  TEMP_PROTO TempProto;
1899  PROTO Proto;
1900  FEATURE F1, F2;
1901  FLOAT32 X1, X2, Y1, Y2;
1902  FLOAT32 A1, A2, AngleDelta;
1903  FLOAT32 SegmentLength;
1904  PROTO_ID Pid;
1905 
1906  for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
1907  ProtoStart < LastBad; ProtoStart = ProtoEnd) {
1908  F1 = Features->Features[*ProtoStart];
1909  X1 = F1->Params[PicoFeatX];
1910  Y1 = F1->Params[PicoFeatY];
1911  A1 = F1->Params[PicoFeatDir];
1912 
1913  for (ProtoEnd = ProtoStart + 1,
1914  SegmentLength = GetPicoFeatureLength();
1915  ProtoEnd < LastBad;
1916  ProtoEnd++, SegmentLength += GetPicoFeatureLength()) {
1917  F2 = Features->Features[*ProtoEnd];
1918  X2 = F2->Params[PicoFeatX];
1919  Y2 = F2->Params[PicoFeatY];
1920  A2 = F2->Params[PicoFeatDir];
1921 
1922  AngleDelta = fabs(A1 - A2);
1923  if (AngleDelta > 0.5)
1924  AngleDelta = 1.0 - AngleDelta;
1925 
1926  if (AngleDelta > matcher_clustering_max_angle_delta ||
1927  fabs(X1 - X2) > SegmentLength ||
1928  fabs(Y1 - Y2) > SegmentLength)
1929  break;
1930  }
1931 
1932  F2 = Features->Features[*(ProtoEnd - 1)];
1933  X2 = F2->Params[PicoFeatX];
1934  Y2 = F2->Params[PicoFeatY];
1935  A2 = F2->Params[PicoFeatDir];
1936 
1937  Pid = AddIntProto(IClass);
1938  if (Pid == NO_PROTO)
1939  return (NO_PROTO);
1940 
1941  TempProto = NewTempProto();
1942  Proto = &(TempProto->Proto);
1943 
1944  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
1945  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
1946  instead of the -0.25 to 0.75 used in baseline normalization */
1947  Proto->Length = SegmentLength;
1948  Proto->Angle = A1;
1949  Proto->X = (X1 + X2) / 2.0;
1950  Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
1951  FillABC(Proto);
1952 
1953  TempProto->ProtoId = Pid;
1954  SET_BIT(TempProtoMask, Pid);
1955 
1956  ConvertProto(Proto, Pid, IClass);
1957  AddProtoToProtoPruner(Proto, Pid, IClass,
1959 
1960  Class->TempProtos = push(Class->TempProtos, TempProto);
1961  }
1962  return IClass->NumProtos - 1;
1963 } /* MakeNewTempProtos */
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:384
uinT16 NumProtos
Definition: intproto.h:108
double matcher_clustering_max_angle_delta
Definition: classify.h:432
uinT16 ProtoId
Definition: adaptive.h:30
inT16 PROTO_ID
Definition: matchdefs.h:41
BIT_VECTOR TempProtoMask
Definition: classify.h:483
FLOAT32 Angle
Definition: protos.h:49
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
FLOAT32 X
Definition: protos.h:47
FLOAT32 Length
Definition: protos.h:50
PROTO_STRUCT Proto
Definition: adaptive.h:32
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:293
void FillABC(PROTO Proto)
Definition: protos.cpp:198
#define SET_BIT(array, bit)
Definition: bitvec.h:57
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:254
uinT8 FEATURE_ID
Definition: matchdefs.h:47
float FLOAT32
Definition: host.h:44
#define NO_PROTO
Definition: matchdefs.h:42
LIST push(LIST list, void *element)
Definition: oldlist.cpp:317
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:516
int classify_learning_debug_level
Definition: classify.h:419
FLOAT32 Y
Definition: protos.h:48
FEATURE Features[1]
Definition: ocrfeatures.h:72
#define GetPicoFeatureLength()
Definition: picofeat.h:59
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:75

◆ MakePermanent()

void tesseract::Classify::MakePermanent ( ADAPT_TEMPLATES  Templates,
CLASS_ID  ClassId,
int  ConfigId,
TBLOB Blob 
)
Parameters
Templatescurrent set of adaptive templates
ClassIdclass containing config to be made permanent
ConfigIdconfig to be made permanent
Blobcurrent blob being adapted to

Globals: none

Note
Exceptions: none
History: Thu Mar 14 15:54:08 1991, DSJ, Created.

Definition at line 1978 of file adaptmatch.cpp.

1981  {
1982  UNICHAR_ID *Ambigs;
1984  ADAPT_CLASS Class;
1985  PROTO_KEY ProtoKey;
1986 
1987  Class = Templates->Class[ClassId];
1988  Config = TempConfigFor(Class, ConfigId);
1989 
1990  MakeConfigPermanent(Class, ConfigId);
1991  if (Class->NumPermConfigs == 0)
1992  Templates->NumPermClasses++;
1993  Class->NumPermConfigs++;
1994 
1995  // Initialize permanent config.
1996  Ambigs = GetAmbiguities(Blob, ClassId);
1998  "PERM_CONFIG_STRUCT");
1999  Perm->Ambigs = Ambigs;
2000  Perm->FontinfoId = Config->FontinfoId;
2001 
2002  // Free memory associated with temporary config (since ADAPTED_CONFIG
2003  // is a union we need to clean up before we record permanent config).
2004  ProtoKey.Templates = Templates;
2005  ProtoKey.ClassId = ClassId;
2006  ProtoKey.ConfigId = ConfigId;
2007  Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm);
2009 
2010  // Record permanent config.
2011  PermConfigFor(Class, ConfigId) = Perm;
2012 
2013  if (classify_learning_debug_level >= 1) {
2014  tprintf("Making config %d for %s (ClassId %d) permanent:"
2015  " fontinfo id %d, ambiguities '",
2016  ConfigId, getDict().getUnicharset().debug_str(ClassId).string(),
2017  ClassId, PermConfigFor(Class, ConfigId)->FontinfoId);
2018  for (UNICHAR_ID *AmbigsPointer = Ambigs;
2019  *AmbigsPointer >= 0; ++AmbigsPointer)
2020  tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
2021  tprintf("'.\n");
2022  }
2023 } /* MakePermanent */
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
LIST delete_d(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:120
CLUSTERCONFIG Config
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:80
PERM_CONFIG_STRUCT * PERM_CONFIG
Definition: adaptive.h:55
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:105
uinT8 NumPermConfigs
Definition: adaptive.h:65
Dict & getDict()
Definition: classify.h:65
CLASS_ID ClassId
Definition: adaptmatch.cpp:115
ADAPT_TEMPLATES Templates
Definition: adaptmatch.cpp:114
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
#define MakeConfigPermanent(Class, ConfigId)
Definition: adaptive.h:96
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
void * alloc_struct(inT32 count, const char *)
Definition: memry.cpp:39
int MakeTempProtoPerm(void *item1, void *item2)
UNICHARSET unicharset
Definition: ccutil.h:70
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
UNICHAR_ID * Ambigs
Definition: adaptive.h:52
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
int UNICHAR_ID
Definition: unichar.h:33

◆ MasterMatcher()

void tesseract::Classify::MasterMatcher ( INT_TEMPLATES  templates,
inT16  num_features,
const INT_FEATURE_STRUCT features,
const uinT8 norm_factors,
ADAPT_CLASS classes,
int  debug,
int  matcher_multiplier,
const TBOX blob_box,
const GenericVector< CP_RESULT_STRUCT > &  results,
ADAPT_RESULTS final_results 
)

Factored-out calls to IntegerMatcher based on class pruner results. Returns integer matcher results inside CLASS_PRUNER_RESULTS structure.

Definition at line 1127 of file adaptmatch.cpp.

1136  {
1137  int top = blob_box.top();
1138  int bottom = blob_box.bottom();
1139  UnicharRating int_result;
1140  for (int c = 0; c < results.size(); c++) {
1141  CLASS_ID class_id = results[c].Class;
1142  BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos
1143  : AllProtosOn;
1144  BIT_VECTOR configs = classes != NULL ? classes[class_id]->PermConfigs
1145  : AllConfigsOn;
1146 
1147  int_result.unichar_id = class_id;
1148  im_.Match(ClassForClassId(templates, class_id),
1149  protos, configs,
1150  num_features, features,
1151  &int_result, classify_adapt_feature_threshold, debug,
1153  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1154  ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top,
1155  results[c].Rating,
1156  final_results->BlobLength,
1157  matcher_multiplier, norm_factors,
1158  &int_result, final_results);
1159  }
1160 }
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475
BIT_VECTOR PermProtos
Definition: adaptive.h:68
int classify_adapt_feature_threshold
Definition: classify.h:447
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define ClassForClassId(T, c)
Definition: intproto.h:181
inT16 bottom() const
Definition: rect.h:61
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
bool matcher_debug_separate_windows
Definition: classify.h:458
BIT_VECTOR AllProtosOn
Definition: classify.h:480
BIT_VECTOR PermConfigs
Definition: adaptive.h:69
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
int size() const
Definition: genericvector.h:72
inT16 top() const
Definition: rect.h:54
inT32 BlobLength
Definition: adaptmatch.cpp:83
IntegerMatcher im_
Definition: classify.h:503

◆ NewAdaptedTemplates()

ADAPT_TEMPLATES tesseract::Classify::NewAdaptedTemplates ( bool  InitFromUnicharset)

Allocates memory for adapted tempates. each char in unicharset to the newly created templates

Parameters
InitFromUnicharsetif true, add an empty class for
Returns
Ptr to new adapted templates.
Note
Globals: none
Exceptions: none
History: Fri Mar 8 10:15:28 1991, DSJ, Created.

Definition at line 167 of file adaptive.cpp.

167  {
168  ADAPT_TEMPLATES Templates;
169  int i;
170 
171  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
172 
173  Templates->Templates = NewIntTemplates ();
174  Templates->NumPermClasses = 0;
175  Templates->NumNonEmptyClasses = 0;
176 
177  /* Insert an empty class for each unichar id in unicharset */
178  for (i = 0; i < MAX_NUM_CLASSES; i++) {
179  Templates->Class[i] = NULL;
180  if (InitFromUnicharset && i < unicharset.size()) {
181  AddAdaptedClass(Templates, NewAdaptedClass(), i);
182  }
183  }
184 
185  return (Templates);
186 
187 } /* NewAdaptedTemplates */
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:49
int size() const
Definition: unicharset.h:297
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:113
INT_TEMPLATES Templates
Definition: adaptive.h:77
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:723
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
void * Emalloc(int Size)
Definition: emalloc.cpp:47
UNICHARSET unicharset
Definition: ccutil.h:70
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:83
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81

◆ NormalizeOutlines()

void tesseract::Classify::NormalizeOutlines ( LIST  Outlines,
FLOAT32 XScale,
FLOAT32 YScale 
)

This routine normalizes every outline in Outlines according to the currently selected normalization method. It also returns the scale factors that it used to do this scaling. The scale factors returned represent the x and y sizes in the normalized coordinate system that correspond to 1 pixel in the original coordinate system.

Globals:

  • classify_norm_method method being used for normalization
  • classify_char_norm_range map radius of gyration to this value
    Parameters
    Outlineslist of outlines to be normalized
    XScalex-direction scale factor used by routine
    YScaley-direction scale factor used by routine
    Returns
    none (Outlines are changed and XScale and YScale are updated)
    Note
    Exceptions: none
    History: Fri Dec 14 08:14:55 1990, DSJ, Created.

Definition at line 301 of file mfoutline.cpp.

303  {
304  MFOUTLINE Outline;
305 
306  switch (classify_norm_method) {
307  case character:
308  ASSERT_HOST(!"How did NormalizeOutlines get called in character mode?");
309  break;
310 
311  case baseline:
312  iterate(Outlines) {
313  Outline = (MFOUTLINE) first_node(Outlines);
314  NormalizeOutline(Outline, 0.0);
315  }
316  *XScale = *YScale = MF_SCALE_FACTOR;
317  break;
318  }
319 } /* NormalizeOutlines */
#define first_node(l)
Definition: oldlist.h:139
void NormalizeOutline(MFOUTLINE Outline, FLOAT32 XOrigin)
Definition: mfoutline.cpp:265
LIST MFOUTLINE
Definition: mfoutline.h:33
#define MF_SCALE_FACTOR
Definition: mfoutline.h:63
#define iterate(l)
Definition: oldlist.h:159
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ PrintAdaptedTemplates()

void tesseract::Classify::PrintAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine prints a summary of the adapted templates in Templates to File.

Parameters
Fileopen text file to print Templates to
Templatesadapted templates to print to File
Note
Globals: none
Exceptions: none
History: Wed Mar 20 13:35:29 1991, DSJ, Created.

Definition at line 273 of file adaptive.cpp.

273  {
274  int i;
275  INT_CLASS IClass;
276  ADAPT_CLASS AClass;
277 
278  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
279  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
280  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
281  fprintf (File, " Id NC NPC NP NPP\n");
282  fprintf (File, "------------------------\n");
283 
284  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
285  IClass = Templates->Templates->Class[i];
286  AClass = Templates->Class[i];
287  if (!IsEmptyAdaptedClass (AClass)) {
288  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
290  IClass->NumConfigs, AClass->NumPermConfigs,
291  IClass->NumProtos,
292  IClass->NumProtos - count (AClass->TempProtos));
293  }
294  }
295  fprintf (File, "\n");
296 
297 } /* PrintAdaptedTemplates */
int count(LIST var_list)
Definition: oldlist.cpp:103
uinT16 NumProtos
Definition: intproto.h:108
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
uinT8 NumPermConfigs
Definition: adaptive.h:65
INT_TEMPLATES Templates
Definition: adaptive.h:77
UNICHARSET unicharset
Definition: ccutil.h:70
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
uinT8 NumConfigs
Definition: intproto.h:110
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81

◆ PrintAdaptiveMatchResults()

void tesseract::Classify::PrintAdaptiveMatchResults ( const ADAPT_RESULTS results)

This routine writes the matches in Results to File.

Parameters
resultsmatch results to write to File

Globals: none

Note
Exceptions: none
History: Mon Mar 18 09:24:53 1991, DSJ, Created.

Definition at line 2077 of file adaptmatch.cpp.

2077  {
2078  for (int i = 0; i < results.match.size(); ++i) {
2079  tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).string());
2080  results.match[i].Print();
2081  }
2082 } /* PrintAdaptiveMatchResults */
const char * string() const
Definition: strngs.cpp:201
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:318
#define tprintf(...)
Definition: tprintf.h:31
int size() const
Definition: genericvector.h:72
UNICHARSET unicharset
Definition: ccutil.h:70
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88

◆ PruneClasses()

int tesseract::Classify::PruneClasses ( const INT_TEMPLATES_STRUCT int_templates,
int  num_features,
int  keep_this,
const INT_FEATURE_STRUCT features,
const uinT8 normalization_factors,
const uinT16 expected_num_features,
GenericVector< CP_RESULT_STRUCT > *  results 
)

Runs the class pruner from int_templates on the given features, returning the number of classes output in results.

Parameters
int_templatesClass pruner tables
num_featuresNumber of features in blob
featuresArray of features
normalization_factorsArray of fudge factors from blob normalization process (by CLASS_INDEX)
expected_num_featuresArray of expected number of features for each class (by CLASS_INDEX)
resultsSorted Array of pruned classes. Must be an array of size at least int_templates->NumClasses.
keep_this

Definition at line 412 of file intmatcher.cpp.

417  {
418  ClassPruner pruner(int_templates->NumClasses);
419  // Compute initial match scores for all classes.
420  pruner.ComputeScores(int_templates, num_features, features);
421  // Adjust match scores for number of expected features.
422  pruner.AdjustForExpectedNumFeatures(expected_num_features,
424  // Apply disabled classes in unicharset - only works without a shape_table.
425  if (shape_table_ == NULL)
426  pruner.DisableDisabledClasses(unicharset);
427  // If fragments are disabled, remove them, also only without a shape table.
429  pruner.DisableFragments(unicharset);
430 
431  // If we have good x-heights, apply the given normalization factors.
432  if (normalization_factors != NULL) {
433  pruner.NormalizeForXheight(classify_class_pruner_multiplier,
434  normalization_factors);
435  } else {
436  pruner.NoNormalization();
437  }
438  // Do the actual pruning and sort the short-list.
439  pruner.PruneAndSort(classify_class_pruner_threshold, keep_this,
440  shape_table_ == NULL, unicharset);
441 
442  if (classify_debug_level > 2) {
443  pruner.DebugMatch(*this, int_templates, features);
444  }
445  if (classify_debug_level > 1) {
446  pruner.SummarizeResult(*this, int_templates, expected_num_features,
448  normalization_factors);
449  }
450  // Convert to the expected output format.
451  return pruner.SetupResults(results);
452 }
ShapeTable * shape_table_
Definition: classify.h:512
bool disable_character_fragments
Definition: classify.h:450
int classify_class_pruner_multiplier
Definition: classify.h:465
int classify_class_pruner_threshold
Definition: classify.h:463
UNICHARSET unicharset
Definition: ccutil.h:70
int classify_cp_cutoff_strength
Definition: classify.h:467

◆ ReadAdaptedTemplates()

ADAPT_TEMPLATES tesseract::Classify::ReadAdaptedTemplates ( FILE *  File)

Read a set of adapted templates from File and return a ptr to the templates.

Parameters
Fileopen text file to read adapted templates from
Returns
Ptr to adapted templates read from File.
Note
Globals: none
Exceptions: none
History: Mon Mar 18 15:18:10 1991, DSJ, Created.

Definition at line 369 of file adaptive.cpp.

369  {
370  int i;
371  ADAPT_TEMPLATES Templates;
372 
373  /* first read the high level adaptive template struct */
374  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
375  fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
376 
377  /* then read in the basic integer templates */
378  Templates->Templates = ReadIntTemplates (File);
379 
380  /* then read in the adaptive info for each class */
381  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
382  Templates->Class[i] = ReadAdaptedClass (File);
383  }
384  return (Templates);
385 
386 } /* ReadAdaptedTemplates */
INT_TEMPLATES ReadIntTemplates(FILE *File)
Definition: intproto.cpp:761
ADAPT_CLASS ReadAdaptedClass(FILE *File)
Definition: adaptive.cpp:313
void * Emalloc(int Size)
Definition: emalloc.cpp:47
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:83

◆ ReadIntTemplates()

INT_TEMPLATES tesseract::Classify::ReadIntTemplates ( FILE *  File)

This routine reads a set of integer templates from File. File must already be open and must be in the correct binary format.

Parameters
Fileopen file to read templates from
Returns
Pointer to integer templates read from File.
Note
Globals: none
Exceptions: none
History: Wed Feb 27 11:48:46 1991, DSJ, Created.

Definition at line 761 of file intproto.cpp.

761  {
762  int i, j, w, x, y, z;
763  BOOL8 swap;
764  int nread;
765  int unicharset_size;
766  int version_id = 0;
767  INT_TEMPLATES Templates;
768  CLASS_PRUNER_STRUCT* Pruner;
769  INT_CLASS Class;
770  uinT8 *Lengths;
771  PROTO_SET ProtoSet;
772 
773  /* variables for conversion from older inttemp formats */
774  int b, bit_number, last_cp_bit_number, new_b, new_i, new_w;
775  CLASS_ID class_id, max_class_id;
776  inT16 *IndexFor = new inT16[MAX_NUM_CLASSES];
777  CLASS_ID *ClassIdFor = new CLASS_ID[MAX_NUM_CLASSES];
778  CLASS_PRUNER_STRUCT **TempClassPruner =
780  uinT32 SetBitsForMask = // word with NUM_BITS_PER_CLASS
781  (1 << NUM_BITS_PER_CLASS) - 1; // set starting at bit 0
782  uinT32 Mask, NewMask, ClassBits;
783  int MaxNumConfigs = MAX_NUM_CONFIGS;
784  int WerdsPerConfigVec = WERDS_PER_CONFIG_VEC;
785 
786  /* first read the high level template struct */
787  Templates = NewIntTemplates();
788  // Read Templates in parts for 64 bit compatibility.
789  if (fread(&unicharset_size, sizeof(int), 1, File) != 1)
790  cprintf("Bad read of inttemp!\n");
791  if (fread(&Templates->NumClasses,
792  sizeof(Templates->NumClasses), 1, File) != 1 ||
793  fread(&Templates->NumClassPruners,
794  sizeof(Templates->NumClassPruners), 1, File) != 1)
795  cprintf("Bad read of inttemp!\n");
796  // Swap status is determined automatically.
797  swap = Templates->NumClassPruners < 0 ||
799  if (swap) {
800  Reverse32(&Templates->NumClassPruners);
801  Reverse32(&Templates->NumClasses);
802  Reverse32(&unicharset_size);
803  }
804  if (Templates->NumClasses < 0) {
805  // This file has a version id!
806  version_id = -Templates->NumClasses;
807  if (fread(&Templates->NumClasses, sizeof(Templates->NumClasses),
808  1, File) != 1)
809  cprintf("Bad read of inttemp!\n");
810  if (swap)
811  Reverse32(&Templates->NumClasses);
812  }
813 
814  if (version_id < 3) {
815  MaxNumConfigs = OLD_MAX_NUM_CONFIGS;
816  WerdsPerConfigVec = OLD_WERDS_PER_CONFIG_VEC;
817  }
818 
819  if (version_id < 2) {
820  for (i = 0; i < unicharset_size; ++i) {
821  if (fread(&IndexFor[i], sizeof(inT16), 1, File) != 1)
822  cprintf("Bad read of inttemp!\n");
823  }
824  for (i = 0; i < Templates->NumClasses; ++i) {
825  if (fread(&ClassIdFor[i], sizeof(CLASS_ID), 1, File) != 1)
826  cprintf("Bad read of inttemp!\n");
827  }
828  if (swap) {
829  for (i = 0; i < Templates->NumClasses; i++)
830  Reverse16(&IndexFor[i]);
831  for (i = 0; i < Templates->NumClasses; i++)
832  Reverse32(&ClassIdFor[i]);
833  }
834  }
835 
836  /* then read in the class pruners */
837  for (i = 0; i < Templates->NumClassPruners; i++) {
838  Pruner = new CLASS_PRUNER_STRUCT;
839  if ((nread =
840  fread(Pruner, 1, sizeof(CLASS_PRUNER_STRUCT),
841  File)) != sizeof(CLASS_PRUNER_STRUCT))
842  cprintf("Bad read of inttemp!\n");
843  if (swap) {
844  for (x = 0; x < NUM_CP_BUCKETS; x++) {
845  for (y = 0; y < NUM_CP_BUCKETS; y++) {
846  for (z = 0; z < NUM_CP_BUCKETS; z++) {
847  for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
848  Reverse32(&Pruner->p[x][y][z][w]);
849  }
850  }
851  }
852  }
853  }
854  if (version_id < 2) {
855  TempClassPruner[i] = Pruner;
856  } else {
857  Templates->ClassPruners[i] = Pruner;
858  }
859  }
860 
861  /* fix class pruners if they came from an old version of inttemp */
862  if (version_id < 2) {
863  // Allocate enough class pruners to cover all the class ids.
864  max_class_id = 0;
865  for (i = 0; i < Templates->NumClasses; i++)
866  if (ClassIdFor[i] > max_class_id)
867  max_class_id = ClassIdFor[i];
868  for (i = 0; i <= CPrunerIdFor(max_class_id); i++) {
869  Templates->ClassPruners[i] = new CLASS_PRUNER_STRUCT;
870  memset(Templates->ClassPruners[i], 0, sizeof(CLASS_PRUNER_STRUCT));
871  }
872  // Convert class pruners from the old format (indexed by class index)
873  // to the new format (indexed by class id).
874  last_cp_bit_number = NUM_BITS_PER_CLASS * Templates->NumClasses - 1;
875  for (i = 0; i < Templates->NumClassPruners; i++) {
876  for (x = 0; x < NUM_CP_BUCKETS; x++)
877  for (y = 0; y < NUM_CP_BUCKETS; y++)
878  for (z = 0; z < NUM_CP_BUCKETS; z++)
879  for (w = 0; w < WERDS_PER_CP_VECTOR; w++) {
880  if (TempClassPruner[i]->p[x][y][z][w] == 0)
881  continue;
882  for (b = 0; b < BITS_PER_WERD; b += NUM_BITS_PER_CLASS) {
883  bit_number = i * BITS_PER_CP_VECTOR + w * BITS_PER_WERD + b;
884  if (bit_number > last_cp_bit_number)
885  break; // the rest of the bits in this word are not used
886  class_id = ClassIdFor[bit_number / NUM_BITS_PER_CLASS];
887  // Single out NUM_BITS_PER_CLASS bits relating to class_id.
888  Mask = SetBitsForMask << b;
889  ClassBits = TempClassPruner[i]->p[x][y][z][w] & Mask;
890  // Move these bits to the new position in which they should
891  // appear (indexed corresponding to the class_id).
892  new_i = CPrunerIdFor(class_id);
893  new_w = CPrunerWordIndexFor(class_id);
894  new_b = CPrunerBitIndexFor(class_id) * NUM_BITS_PER_CLASS;
895  if (new_b > b) {
896  ClassBits <<= (new_b - b);
897  } else {
898  ClassBits >>= (b - new_b);
899  }
900  // Copy bits relating to class_id to the correct position
901  // in Templates->ClassPruner.
902  NewMask = SetBitsForMask << new_b;
903  Templates->ClassPruners[new_i]->p[x][y][z][new_w] &= ~NewMask;
904  Templates->ClassPruners[new_i]->p[x][y][z][new_w] |= ClassBits;
905  }
906  }
907  }
908  for (i = 0; i < Templates->NumClassPruners; i++) {
909  delete TempClassPruner[i];
910  }
911  }
912 
913  /* then read in each class */
914  for (i = 0; i < Templates->NumClasses; i++) {
915  /* first read in the high level struct for the class */
916  Class = (INT_CLASS) Emalloc (sizeof (INT_CLASS_STRUCT));
917  if (fread(&Class->NumProtos, sizeof(Class->NumProtos), 1, File) != 1 ||
918  fread(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File) != 1 ||
919  fread(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File) != 1)
920  cprintf ("Bad read of inttemp!\n");
921  if (version_id == 0) {
922  // Only version 0 writes 5 pointless pointers to the file.
923  for (j = 0; j < 5; ++j) {
924  int junk;
925  if (fread(&junk, sizeof(junk), 1, File) != 1)
926  cprintf ("Bad read of inttemp!\n");
927  }
928  }
929  if (version_id < 4) {
930  for (j = 0; j < MaxNumConfigs; ++j) {
931  if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1)
932  cprintf ("Bad read of inttemp!\n");
933  }
934  if (swap) {
935  Reverse16(&Class->NumProtos);
936  for (j = 0; j < MaxNumConfigs; j++)
937  Reverse16(&Class->ConfigLengths[j]);
938  }
939  } else {
940  ASSERT_HOST(Class->NumConfigs < MaxNumConfigs);
941  for (j = 0; j < Class->NumConfigs; ++j) {
942  if (fread(&Class->ConfigLengths[j], sizeof(uinT16), 1, File) != 1)
943  cprintf ("Bad read of inttemp!\n");
944  }
945  if (swap) {
946  Reverse16(&Class->NumProtos);
947  for (j = 0; j < MaxNumConfigs; j++)
948  Reverse16(&Class->ConfigLengths[j]);
949  }
950  }
951  if (version_id < 2) {
952  ClassForClassId (Templates, ClassIdFor[i]) = Class;
953  } else {
954  ClassForClassId (Templates, i) = Class;
955  }
956 
957  /* then read in the proto lengths */
958  Lengths = NULL;
959  if (MaxNumIntProtosIn (Class) > 0) {
960  Lengths = (uinT8 *)Emalloc(sizeof(uinT8) * MaxNumIntProtosIn(Class));
961  if ((nread =
962  fread((char *)Lengths, sizeof(uinT8),
963  MaxNumIntProtosIn(Class), File)) != MaxNumIntProtosIn (Class))
964  cprintf ("Bad read of inttemp!\n");
965  }
966  Class->ProtoLengths = Lengths;
967 
968  /* then read in the proto sets */
969  for (j = 0; j < Class->NumProtoSets; j++) {
970  ProtoSet = (PROTO_SET)Emalloc(sizeof(PROTO_SET_STRUCT));
971  if (version_id < 3) {
972  if ((nread =
973  fread((char *) &ProtoSet->ProtoPruner, 1,
974  sizeof(PROTO_PRUNER), File)) != sizeof(PROTO_PRUNER))
975  cprintf("Bad read of inttemp!\n");
976  for (x = 0; x < PROTOS_PER_PROTO_SET; x++) {
977  if ((nread = fread((char *) &ProtoSet->Protos[x].A, 1,
978  sizeof(inT8), File)) != sizeof(inT8) ||
979  (nread = fread((char *) &ProtoSet->Protos[x].B, 1,
980  sizeof(uinT8), File)) != sizeof(uinT8) ||
981  (nread = fread((char *) &ProtoSet->Protos[x].C, 1,
982  sizeof(inT8), File)) != sizeof(inT8) ||
983  (nread = fread((char *) &ProtoSet->Protos[x].Angle, 1,
984  sizeof(uinT8), File)) != sizeof(uinT8))
985  cprintf("Bad read of inttemp!\n");
986  for (y = 0; y < WerdsPerConfigVec; y++)
987  if ((nread = fread((char *) &ProtoSet->Protos[x].Configs[y], 1,
988  sizeof(uinT32), File)) != sizeof(uinT32))
989  cprintf("Bad read of inttemp!\n");
990  }
991  } else {
992  if ((nread =
993  fread((char *) ProtoSet, 1, sizeof(PROTO_SET_STRUCT),
994  File)) != sizeof(PROTO_SET_STRUCT))
995  cprintf("Bad read of inttemp!\n");
996  }
997  if (swap) {
998  for (x = 0; x < NUM_PP_PARAMS; x++)
999  for (y = 0; y < NUM_PP_BUCKETS; y++)
1000  for (z = 0; z < WERDS_PER_PP_VECTOR; z++)
1001  Reverse32(&ProtoSet->ProtoPruner[x][y][z]);
1002  for (x = 0; x < PROTOS_PER_PROTO_SET; x++)
1003  for (y = 0; y < WerdsPerConfigVec; y++)
1004  Reverse32(&ProtoSet->Protos[x].Configs[y]);
1005  }
1006  Class->ProtoSets[j] = ProtoSet;
1007  }
1008  if (version_id < 4)
1009  Class->font_set_id = -1;
1010  else {
1011  fread(&Class->font_set_id, sizeof(int), 1, File);
1012  if (swap)
1013  Reverse32(&Class->font_set_id);
1014  }
1015  }
1016 
1017  if (version_id < 2) {
1018  /* add an empty NULL class with class id 0 */
1019  assert(UnusedClassIdIn (Templates, 0));
1020  ClassForClassId (Templates, 0) = NewIntClass (1, 1);
1021  ClassForClassId (Templates, 0)->font_set_id = -1;
1022  Templates->NumClasses++;
1023  /* make sure the classes are contiguous */
1024  for (i = 0; i < MAX_NUM_CLASSES; i++) {
1025  if (i < Templates->NumClasses) {
1026  if (ClassForClassId (Templates, i) == NULL) {
1027  fprintf(stderr, "Non-contiguous class ids in inttemp\n");
1028  exit(1);
1029  }
1030  } else {
1031  if (ClassForClassId (Templates, i) != NULL) {
1032  fprintf(stderr, "Class id %d exceeds NumClassesIn (Templates) %d\n",
1033  i, Templates->NumClasses);
1034  exit(1);
1035  }
1036  }
1037  }
1038  }
1039  if (version_id >= 4) {
1040  this->fontinfo_table_.read(File, NewPermanentTessCallback(read_info), swap);
1041  if (version_id >= 5) {
1042  this->fontinfo_table_.read(File,
1044  swap);
1045  }
1046  this->fontset_table_.read(File, NewPermanentTessCallback(read_set), swap);
1047  }
1048 
1049  // Clean up.
1050  delete[] IndexFor;
1051  delete[] ClassIdFor;
1052  delete[] TempClassPruner;
1053 
1054  return (Templates);
1055 } /* ReadIntTemplates */
#define OLD_WERDS_PER_CONFIG_VEC
Definition: intproto.cpp:113
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
uinT16 NumProtos
Definition: intproto.h:108
#define CPrunerIdFor(c)
Definition: intproto.h:183
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
#define WERDS_PER_PP_VECTOR
Definition: intproto.h:62
bool read_info(FILE *f, FontInfo *fi, bool swap)
Definition: fontinfo.cpp:152
short inT16
Definition: host.h:33
struct INT_CLASS_STRUCT * INT_CLASS
#define NUM_PP_PARAMS
Definition: intproto.h:50
struct PROTO_SET_STRUCT * PROTO_SET
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
void Reverse32(void *ptr)
Definition: helpers.h:193
#define WERDS_PER_CP_VECTOR
Definition: intproto.h:61
#define MAX_NUM_CLASS_PRUNERS
Definition: intproto.h:59
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
#define NUM_BITS_PER_CLASS
Definition: intproto.h:54
#define OLD_MAX_NUM_CONFIGS
Definition: intproto.cpp:112
#define BITS_PER_WERD
Definition: intproto.h:44
unsigned char uinT8
Definition: host.h:32
#define MaxNumIntProtosIn(C)
Definition: intproto.h:168
unsigned char BOOL8
Definition: host.h:46
#define PROTOS_PER_PROTO_SET
Definition: intproto.h:48
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
#define ClassForClassId(T, c)
Definition: intproto.h:181
#define WERDS_PER_CONFIG_VEC
Definition: intproto.h:68
unsigned short uinT16
Definition: host.h:34
uinT8 NumProtoSets
Definition: intproto.h:109
uinT32 PROTO_PRUNER[NUM_PP_PARAMS][NUM_PP_BUCKETS][WERDS_PER_PP_VECTOR]
Definition: intproto.h:92
uinT32 Configs[WERDS_PER_CONFIG_VEC]
Definition: intproto.h:86
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:664
#define BITS_PER_CP_VECTOR
Definition: intproto.h:58
#define CPrunerWordIndexFor(c)
Definition: intproto.h:185
SIGNED char inT8
Definition: host.h:31
#define CPrunerBitIndexFor(c)
Definition: intproto.h:186
INT_PROTO_STRUCT Protos[PROTOS_PER_PROTO_SET]
Definition: intproto.h:97
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
uinT32 p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
Definition: intproto.h:77
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:723
void Reverse16(void *ptr)
Definition: helpers.h:188
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
unsigned int uinT32
Definition: host.h:36
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
void * Emalloc(int Size)
Definition: emalloc.cpp:47
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
#define NUM_CP_BUCKETS
Definition: intproto.h:52
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
bool read_spacing_info(FILE *f, FontInfo *fi, bool swap)
Definition: fontinfo.cpp:177
uinT8 NumConfigs
Definition: intproto.h:110
PROTO_PRUNER ProtoPruner
Definition: intproto.h:96
#define NUM_PP_BUCKETS
Definition: intproto.h:51
#define ASSERT_HOST(x)
Definition: errcode.h:84
uinT8 * ProtoLengths
Definition: intproto.h:112
bool read_set(FILE *f, FontSet *fs, bool swap)
Definition: fontinfo.cpp:240

◆ ReadNewCutoffs()

void tesseract::Classify::ReadNewCutoffs ( FILE *  CutoffFile,
bool  swap,
inT64  end_offset,
CLASS_CUTOFF_ARRAY  Cutoffs 
)

Open Filename, read in all of the class-id/cutoff pairs and insert them into the Cutoffs array. Cutoffs are indexed in the array by class id. Unused entries in the array are set to an arbitrarily high cutoff value.

Parameters
CutoffFilename of file containing cutoff definitions
Cutoffsarray to put cutoffs into
swap
end_offset
Returns
none
Note
Globals: none
Exceptions: none
History: Wed Feb 20 09:38:26 1991, DSJ, Created.

Definition at line 52 of file cutoffs.cpp.

53  {
54  char Class[UNICHAR_LEN + 1];
55  CLASS_ID ClassId;
56  int Cutoff;
57  int i;
58 
59  if (shape_table_ != NULL) {
60  if (!shapetable_cutoffs_.DeSerialize(swap, CutoffFile)) {
61  tprintf("Error during read of shapetable pffmtable!\n");
62  }
63  }
64  for (i = 0; i < MAX_NUM_CLASSES; i++)
65  Cutoffs[i] = MAX_CUTOFF;
66 
67  while ((end_offset < 0 || ftell(CutoffFile) < end_offset) &&
68  tfscanf(CutoffFile, "%" REALLY_QUOTE_IT(UNICHAR_LEN) "s %d",
69  Class, &Cutoff) == 2) {
70  if (strcmp(Class, "NULL") == 0) {
71  ClassId = unicharset.unichar_to_id(" ");
72  } else {
73  ClassId = unicharset.unichar_to_id(Class);
74  }
75  Cutoffs[ClassId] = Cutoff;
76  SkipNewline(CutoffFile);
77  }
78 }
#define MAX_CUTOFF
Definition: cutoffs.cpp:35
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:228
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define UNICHAR_LEN
Definition: unichar.h:30
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
#define tprintf(...)
Definition: tprintf.h:31
ShapeTable * shape_table_
Definition: classify.h:512
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
#define REALLY_QUOTE_IT(x)
Definition: cutoffs.cpp:33
UNICHARSET unicharset
Definition: ccutil.h:70
void SkipNewline(FILE *file)
Definition: helpers.h:84
bool DeSerialize(bool swap, FILE *fp)

◆ ReadNormProtos()

NORM_PROTOS * tesseract::Classify::ReadNormProtos ( FILE *  File,
inT64  end_offset 
)

This routine allocates a new data structure to hold a set of character normalization protos. It then fills in the data structure by reading from the specified File.

Parameters
Fileopen text file to read normalization protos from
end_offsetGlobals: none
Returns
Character normalization protos.
Note
Exceptions: none
History: Wed Dec 19 16:38:49 1990, DSJ, Created.

Definition at line 245 of file normmatch.cpp.

245  {
247  int i;
248  char unichar[2 * UNICHAR_LEN + 1];
249  UNICHAR_ID unichar_id;
250  LIST Protos;
251  int NumProtos;
252 
253  /* allocate and initialization data structure */
254  NormProtos = (NORM_PROTOS *) Emalloc (sizeof (NORM_PROTOS));
256  NormProtos->Protos = (LIST *) Emalloc (NormProtos->NumProtos * sizeof(LIST));
257  for (i = 0; i < NormProtos->NumProtos; i++)
258  NormProtos->Protos[i] = NIL_LIST;
259 
260  /* read file header and save in data structure */
263 
264  /* read protos for each class into a separate list */
265  while ((end_offset < 0 || ftell(File) < end_offset) &&
266  tfscanf(File, "%s %d", unichar, &NumProtos) == 2) {
267  if (unicharset.contains_unichar(unichar)) {
268  unichar_id = unicharset.unichar_to_id(unichar);
269  Protos = NormProtos->Protos[unichar_id];
270  for (i = 0; i < NumProtos; i++)
271  Protos =
272  push_last (Protos, ReadPrototype (File, NormProtos->NumParams));
273  NormProtos->Protos[unichar_id] = Protos;
274  } else {
275  cprintf("Error: unichar %s in normproto file is not in unichar set.\n",
276  unichar);
277  for (i = 0; i < NumProtos; i++)
279  }
280  SkipNewline(File);
281  }
282  return (NormProtos);
283 } /* ReadNormProtos */
PARAM_DESC * ParamDesc
Definition: normmatch.cpp:41
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
PROTOTYPE * ReadPrototype(FILE *File, uinT16 N)
Definition: clusttool.cpp:114
bool TESS_API contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
int size() const
Definition: unicharset.h:297
LIST * Protos
Definition: normmatch.cpp:42
#define NIL_LIST
Definition: oldlist.h:126
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:228
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:332
#define UNICHAR_LEN
Definition: unichar.h:30
void FreePrototype(void *arg)
Definition: cluster.cpp:588
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
PARAM_DESC * ReadParamDesc(FILE *File, uinT16 N)
Definition: clusttool.cpp:67
uinT16 ReadSampleSize(FILE *File)
Definition: clusttool.cpp:44
void * Emalloc(int Size)
Definition: emalloc.cpp:47
UNICHARSET unicharset
Definition: ccutil.h:70
NORM_PROTOS * NormProtos
Definition: classify.h:486
void SkipNewline(FILE *file)
Definition: helpers.h:84
int UNICHAR_ID
Definition: unichar.h:33

◆ RefreshDebugWindow()

void tesseract::Classify::RefreshDebugWindow ( ScrollView **  win,
const char *  msg,
int  y_offset,
const TBOX wbox 
)

Definition at line 220 of file adaptmatch.cpp.

221  {
222  #ifndef GRAPHICS_DISABLED
223  const int kSampleSpaceWidth = 500;
224  if (*win == NULL) {
225  *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
226  kSampleSpaceWidth * 2, 200, true);
227  }
228  (*win)->Clear();
229  (*win)->Pen(64, 64, 64);
230  (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset,
231  kSampleSpaceWidth, kBlnBaselineOffset);
232  (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset,
233  kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset);
234  (*win)->ZoomToRectangle(wbox.left(), wbox.top(),
235  wbox.right(), wbox.bottom());
236  #endif // GRAPHICS_DISABLED
237 }
const int kBlnBaselineOffset
Definition: normalis.h:29
inT16 bottom() const
Definition: rect.h:61
inT16 left() const
Definition: rect.h:68
const int kBlnXHeight
Definition: normalis.h:28
inT16 top() const
Definition: rect.h:54
inT16 right() const
Definition: rect.h:75

◆ RemoveBadMatches()

void tesseract::Classify::RemoveBadMatches ( ADAPT_RESULTS Results)

This routine steps through each matching class in Results and removes it from the match list if its rating is worse than the BestRating plus a pad. In other words, all good matches get moved to the front of the classes array.

Parameters
Resultscontains matches to be filtered

Globals:

  • matcher_bad_match_pad defines a "bad match"
Note
Exceptions: none
History: Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2100 of file adaptmatch.cpp.

2100  {
2101  int Next, NextGood;
2102  FLOAT32 BadMatchThreshold;
2103  static const char* romans = "i v x I V X";
2104  BadMatchThreshold = Results->best_rating - matcher_bad_match_pad;
2105 
2107  UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
2108  unicharset.unichar_to_id("1") : -1;
2109  UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
2110  unicharset.unichar_to_id("0") : -1;
2111  float scored_one = ScoredUnichar(unichar_id_one, *Results);
2112  float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
2113 
2114  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2115  const UnicharRating& match = Results->match[Next];
2116  if (match.rating >= BadMatchThreshold) {
2117  if (!unicharset.get_isalpha(match.unichar_id) ||
2118  strstr(romans,
2119  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2120  } else if (unicharset.eq(match.unichar_id, "l") &&
2121  scored_one < BadMatchThreshold) {
2122  Results->match[Next].unichar_id = unichar_id_one;
2123  } else if (unicharset.eq(match.unichar_id, "O") &&
2124  scored_zero < BadMatchThreshold) {
2125  Results->match[Next].unichar_id = unichar_id_zero;
2126  } else {
2127  Results->match[Next].unichar_id = INVALID_UNICHAR_ID; // Don't copy.
2128  }
2129  if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) {
2130  if (NextGood == Next) {
2131  ++NextGood;
2132  } else {
2133  Results->match[NextGood++] = Results->match[Next];
2134  }
2135  }
2136  }
2137  }
2138  } else {
2139  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2140  if (Results->match[Next].rating >= BadMatchThreshold) {
2141  if (NextGood == Next) {
2142  ++NextGood;
2143  } else {
2144  Results->match[NextGood++] = Results->match[Next];
2145  }
2146  }
2147  }
2148  }
2149  Results->match.truncate(NextGood);
2150 } /* RemoveBadMatches */
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:656
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:449
bool TESS_API contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
double matcher_bad_match_pad
Definition: classify.h:423
float FLOAT32
Definition: host.h:44
void truncate(int size)
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
int size() const
Definition: genericvector.h:72
bool classify_bln_numeric_mode
Definition: classify.h:500
UNICHARSET unicharset
Definition: ccutil.h:70
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
FLOAT32 best_rating
Definition: adaptmatch.cpp:87
int UNICHAR_ID
Definition: unichar.h:33

◆ RemoveExtraPuncs()

void tesseract::Classify::RemoveExtraPuncs ( ADAPT_RESULTS Results)

This routine discards extra digits or punctuation from the results. We keep only the top 2 punctuation answers and the top 1 digit answer if present.

Parameters
Resultscontains matches to be filtered
Note
History: Tue Mar 12 13:51:03 1991, DSJ, Created.

Definition at line 2162 of file adaptmatch.cpp.

2162  {
2163  int Next, NextGood;
2164  int punc_count; /*no of garbage characters */
2165  int digit_count;
2166  /*garbage characters */
2167  static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2168  static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
2169 
2170  punc_count = 0;
2171  digit_count = 0;
2172  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2173  const UnicharRating& match = Results->match[Next];
2174  bool keep = true;
2175  if (strstr(punc_chars,
2176  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2177  if (punc_count >= 2)
2178  keep = false;
2179  punc_count++;
2180  } else {
2181  if (strstr(digit_chars,
2182  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2183  if (digit_count >= 1)
2184  keep = false;
2185  digit_count++;
2186  }
2187  }
2188  if (keep) {
2189  if (NextGood == Next) {
2190  ++NextGood;
2191  } else {
2192  Results->match[NextGood++] = match;
2193  }
2194  }
2195  }
2196  Results->match.truncate(NextGood);
2197 } /* RemoveExtraPuncs */
void truncate(int size)
int size() const
Definition: genericvector.h:72
UNICHARSET unicharset
Definition: ccutil.h:70
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88

◆ ResetAdaptiveClassifierInternal()

void tesseract::Classify::ResetAdaptiveClassifierInternal ( )

Definition at line 613 of file adaptmatch.cpp.

613  {
615  tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
616  NumAdaptationsFailed);
617  }
620  if (BackupAdaptedTemplates != NULL)
622  BackupAdaptedTemplates = NULL;
623  NumAdaptationsFailed = 0;
624 }
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167

◆ SetAdaptiveThreshold()

void tesseract::Classify::SetAdaptiveThreshold ( FLOAT32  Threshold)

This routine resets the internal thresholds inside the integer matcher to correspond to the specified threshold.

Parameters
Thresholdthreshold for creating new templates

Globals:

  • matcher_good_threshold default good match rating
Note
Exceptions: none
History: Tue Apr 9 08:33:13 1991, DSJ, Created.

Definition at line 2213 of file adaptmatch.cpp.

2213  {
2214  Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold);
2216  ClipToRange<int>(255 * Threshold, 0, 255));
2218  ClipToRange<int>(255 * Threshold, 0, 255));
2219 } /* SetAdaptiveThreshold */
double matcher_good_threshold
Definition: classify.h:420
int classify_adapt_feature_threshold
Definition: classify.h:447
int classify_adapt_proto_threshold
Definition: classify.h:445

◆ SetStaticClassifier()

void tesseract::Classify::SetStaticClassifier ( ShapeClassifier static_classifier)

Definition at line 204 of file classify.cpp.

204  {
205  delete static_classifier_;
206  static_classifier_ = static_classifier;
207 }

◆ SettupPass1()

void tesseract::Classify::SettupPass1 ( )

This routine prepares the adaptive matcher for the start of the first pass. Learning is enabled (unless it is disabled for the whole program).

Note
this is somewhat redundant, it simply says that if learning is enabled then it will remain enabled on the first pass. If it is disabled, then it will remain disabled. This is only put here to make it very clear that learning is controlled directly by the global setting of EnableLearning.

Globals:

Note
Exceptions: none
History: Mon Apr 15 16:39:29 1991, DSJ, Created.

Definition at line 670 of file adaptmatch.cpp.

670  {
672 
674 
675 } /* SettupPass1 */
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:369
bool classify_enable_learning
Definition: classify.h:389
Dict & getDict()
Definition: classify.h:65

◆ SettupPass2()

void tesseract::Classify::SettupPass2 ( )

This routine prepares the adaptive matcher for the start of the second pass. Further learning is disabled.

Globals:

Note
Exceptions: none
History: Mon Apr 15 16:39:29 1991, DSJ, Created.

Definition at line 690 of file adaptmatch.cpp.

690  {
693 
694 } /* SettupPass2 */
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:373
#define FALSE
Definition: capi.h:46
Dict & getDict()
Definition: classify.h:65

◆ SetupBLCNDenorms()

void tesseract::Classify::SetupBLCNDenorms ( const TBLOB blob,
bool  nonlinear_norm,
DENORM bl_denorm,
DENORM cn_denorm,
INT_FX_RESULT_STRUCT fx_info 
)
static

Definition at line 133 of file intfx.cpp.

135  {
136  // Compute 1st and 2nd moments of the original outline.
137  FCOORD center, second_moments;
138  int length = blob.ComputeMoments(&center, &second_moments);
139  if (fx_info != NULL) {
140  fx_info->Length = length;
141  fx_info->Rx = IntCastRounded(second_moments.y());
142  fx_info->Ry = IntCastRounded(second_moments.x());
143 
144  fx_info->Xmean = IntCastRounded(center.x());
145  fx_info->Ymean = IntCastRounded(center.y());
146  }
147  // Setup the denorm for Baseline normalization.
148  bl_denorm->SetupNormalization(NULL, NULL, &blob.denorm(), center.x(), 128.0f,
149  1.0f, 1.0f, 128.0f, 128.0f);
150  // Setup the denorm for character normalization.
151  if (nonlinear_norm) {
154  TBOX box;
155  blob.GetPreciseBoundingBox(&box);
156  box.pad(1, 1);
157  blob.GetEdgeCoords(box, &x_coords, &y_coords);
158  cn_denorm->SetupNonLinear(&blob.denorm(), box, MAX_UINT8, MAX_UINT8,
159  0.0f, 0.0f, x_coords, y_coords);
160  } else {
161  cn_denorm->SetupNormalization(NULL, NULL, &blob.denorm(),
162  center.x(), center.y(),
163  51.2f / second_moments.x(),
164  51.2f / second_moments.y(),
165  128.0f, 128.0f);
166  }
167 }
void GetEdgeCoords(const TBOX &box, GenericVector< GenericVector< int > > *x_coords, GenericVector< GenericVector< int > > *y_coords) const
Definition: blobs.cpp:570
int IntCastRounded(double x)
Definition: helpers.h:172
void SetupNonLinear(const DENORM *predecessor, const TBOX &box, float target_width, float target_height, float final_xshift, float final_yshift, const GenericVector< GenericVector< int > > &x_coords, const GenericVector< GenericVector< int > > &y_coords)
Definition: normalis.cpp:267
void SetupNormalization(const BLOCK *block, const FCOORD *rotation, const DENORM *predecessor, float x_origin, float y_origin, float x_scale, float y_scale, float final_xshift, float final_yshift)
Definition: normalis.cpp:95
#define MAX_UINT8
Definition: host.h:54
float y() const
Definition: points.h:212
const DENORM & denorm() const
Definition: blobs.h:340
void GetPreciseBoundingBox(TBOX *precise_box) const
Definition: blobs.cpp:554
Definition: points.h:189
void pad(int xpad, int ypad)
Definition: rect.h:127
float x() const
Definition: points.h:209
int ComputeMoments(FCOORD *center, FCOORD *second_moments) const
Definition: blobs.cpp:535
Definition: rect.h:30

◆ shape_table()

const ShapeTable* tesseract::Classify::shape_table ( ) const
inline

Definition at line 69 of file classify.h.

69  {
70  return shape_table_;
71  }
ShapeTable * shape_table_
Definition: classify.h:512

◆ ShapeIDToClassID()

int tesseract::Classify::ShapeIDToClassID ( int  shape_id) const

Definition at line 2297 of file adaptmatch.cpp.

2297  {
2298  for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
2299  int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
2300  ASSERT_HOST(font_set_id >= 0);
2301  const FontSet &fs = fontset_table_.get(font_set_id);
2302  for (int config = 0; config < fs.size; ++config) {
2303  if (fs.configs[config] == shape_id)
2304  return id;
2305  }
2306  }
2307  tprintf("Shape %d not found\n", shape_id);
2308  return -1;
2309 }
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
#define tprintf(...)
Definition: tprintf.h:31
#define ASSERT_HOST(x)
Definition: errcode.h:84
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124

◆ ShowBestMatchFor()

void tesseract::Classify::ShowBestMatchFor ( int  shape_id,
const INT_FEATURE_STRUCT features,
int  num_features 
)

This routine displays debug information for the best config of the given shape_id for the given set of features.

Parameters
shape_idclassifier id to work with
featuresfeatures of the unknown character
num_featuresNumber of features in the features array.
Note
Exceptions: none
History: Fri Mar 22 08:43:52 1991, DSJ, Created.

Definition at line 2234 of file adaptmatch.cpp.

2236  {
2237 #ifndef GRAPHICS_DISABLED
2238  uinT32 config_mask;
2239  if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) {
2240  tprintf("No built-in templates for class/shape %d\n", shape_id);
2241  return;
2242  }
2243  if (num_features <= 0) {
2244  tprintf("Illegal blob (char norm features)!\n");
2245  return;
2246  }
2247  UnicharRating cn_result;
2248  classify_norm_method.set_value(character);
2251  num_features, features, &cn_result,
2254  tprintf("\n");
2255  config_mask = 1 << cn_result.config;
2256 
2257  tprintf("Static Shape ID: %d\n", shape_id);
2258  ShowMatchDisplay();
2260  AllProtosOn, reinterpret_cast<BIT_VECTOR>(&config_mask),
2261  num_features, features, &cn_result,
2266 #endif // GRAPHICS_DISABLED
2267 } /* ShowBestMatchFor */
#define NO_DEBUG
Definition: adaptmatch.cpp:70
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
int classify_adapt_feature_threshold
Definition: classify.h:447
void UpdateMatchDisplay()
Definition: intproto.cpp:467
#define ClassForClassId(T, c)
Definition: intproto.h:181
bool matcher_debug_separate_windows
Definition: classify.h:458
BIT_VECTOR AllProtosOn
Definition: classify.h:480
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
#define tprintf(...)
Definition: tprintf.h:31
unsigned int uinT32
Definition: host.h:36
IntegerMatcher im_
Definition: classify.h:503

◆ ShowMatchDisplay()

void tesseract::Classify::ShowMatchDisplay ( )

This routine sends the shapes in the global display lists to the match debugger window.

Globals:

  • FeatureShapes display list containing feature matches
  • ProtoShapes display list containing proto matches
    Returns
    none
    Note
    Exceptions: none
    History: Thu Mar 21 15:47:33 1991, DSJ, Created.

Definition at line 1070 of file intproto.cpp.

1070  {
1072  if (ProtoDisplayWindow) {
1074  }
1075  if (FeatureDisplayWindow) {
1077  }
1079  static_cast<NORM_METHOD>(static_cast<int>(classify_norm_method)),
1080  IntMatchWindow);
1082  INT_MAX_X, INT_MAX_Y);
1083  if (ProtoDisplayWindow) {
1085  INT_MAX_X, INT_MAX_Y);
1086  }
1087  if (FeatureDisplayWindow) {
1089  INT_MAX_X, INT_MAX_Y);
1090  }
1091 } /* ShowMatchDisplay */
ScrollView * FeatureDisplayWindow
Definition: intproto.cpp:180
void InitIntMatchWindowIfReqd()
Definition: intproto.cpp:1879
#define INT_MIN_Y
Definition: intproto.cpp:64
#define INT_MAX_X
Definition: intproto.cpp:65
#define INT_MIN_X
Definition: intproto.cpp:63
ScrollView * ProtoDisplayWindow
Definition: intproto.cpp:181
ScrollView * IntMatchWindow
Definition: intproto.cpp:179
void ClearFeatureSpaceWindow(NORM_METHOD norm_method, ScrollView *window)
Definition: intproto.cpp:1095
void Clear()
Definition: scrollview.cpp:595
#define INT_MAX_Y
Definition: intproto.cpp:66
void ZoomToRectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:765

◆ StartBackupAdaptiveClassifier()

void tesseract::Classify::StartBackupAdaptiveClassifier ( )

Definition at line 644 of file adaptmatch.cpp.

644  {
645  if (BackupAdaptedTemplates != NULL)
648 }
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167

◆ SwitchAdaptiveClassifier()

void tesseract::Classify::SwitchAdaptiveClassifier ( )

Definition at line 628 of file adaptmatch.cpp.

628  {
629  if (BackupAdaptedTemplates == NULL) {
631  return;
632  }
634  tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
635  NumAdaptationsFailed);
636  }
639  BackupAdaptedTemplates = NULL;
640  NumAdaptationsFailed = 0;
641 }
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
void ResetAdaptiveClassifierInternal()
Definition: adaptmatch.cpp:613
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477

◆ TempConfigReliable()

bool tesseract::Classify::TempConfigReliable ( CLASS_ID  class_id,
const TEMP_CONFIG config 
)

Definition at line 2313 of file adaptmatch.cpp.

2314  {
2315  if (classify_learning_debug_level >= 1) {
2316  tprintf("NumTimesSeen for config of %s is %d\n",
2317  getDict().getUnicharset().debug_str(class_id).string(),
2318  config->NumTimesSeen);
2319  }
2321  return true;
2322  } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) {
2323  return false;
2324  } else if (use_ambigs_for_adaption) {
2325  // Go through the ambigs vector and see whether we have already seen
2326  // enough times all the characters represented by the ambigs vector.
2327  const UnicharIdVector *ambigs =
2329  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2330  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2331  ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]];
2332  assert(ambig_class != NULL);
2333  if (ambig_class->NumPermConfigs == 0 &&
2334  ambig_class->MaxNumTimesSeen <
2336  if (classify_learning_debug_level >= 1) {
2337  tprintf("Ambig %s has not been seen enough times,"
2338  " not making config for %s permanent\n",
2339  getDict().getUnicharset().debug_str(
2340  (*ambigs)[ambig]).string(),
2341  getDict().getUnicharset().debug_str(class_id).string());
2342  }
2343  return false;
2344  }
2345  }
2346  }
2347  return true;
2348 }
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
bool use_ambigs_for_adaption
Definition: ccutil.h:91
GenericVector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:34
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:430
uinT8 NumPermConfigs
Definition: adaptive.h:65
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:192
uinT8 NumTimesSeen
Definition: adaptive.h:41
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:103
Dict & getDict()
Definition: classify.h:65
uinT8 MaxNumTimesSeen
Definition: adaptive.h:66
int matcher_min_examples_for_prototyping
Definition: classify.h:428
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
int size() const
Definition: genericvector.h:72
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81

◆ UpdateAmbigsGroup()

void tesseract::Classify::UpdateAmbigsGroup ( CLASS_ID  class_id,
TBLOB Blob 
)

Definition at line 2350 of file adaptmatch.cpp.

2350  {
2351  const UnicharIdVector *ambigs =
2353  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2354  if (classify_learning_debug_level >= 1) {
2355  tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n",
2356  getDict().getUnicharset().debug_str(class_id).string(), class_id);
2357  }
2358  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2359  CLASS_ID ambig_class_id = (*ambigs)[ambig];
2360  const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id];
2361  for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) {
2362  if (ConfigIsPermanent(ambigs_class, cfg)) continue;
2363  const TEMP_CONFIG config =
2364  TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg);
2365  if (config != NULL && TempConfigReliable(ambig_class_id, config)) {
2366  if (classify_learning_debug_level >= 1) {
2367  tprintf("Making config %d of %s permanent\n", cfg,
2368  getDict().getUnicharset().debug_str(
2369  ambig_class_id).string());
2370  }
2371  MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob);
2372  }
2373  }
2374  }
2375 }
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
GenericVector< UNICHAR_ID > UnicharIdVector
Definition: ambigs.h:34
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:103
Dict & getDict()
Definition: classify.h:65
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:201
#define tprintf(...)
Definition: tprintf.h:31
int classify_learning_debug_level
Definition: classify.h:419
int size() const
Definition: genericvector.h:72
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81

◆ WriteAdaptedTemplates()

void tesseract::Classify::WriteAdaptedTemplates ( FILE *  File,
ADAPT_TEMPLATES  Templates 
)

This routine saves Templates to File in a binary format.

Parameters
Fileopen text file to write Templates to
Templatesset of adapted templates to write to File
Note
Globals: none
Exceptions: none
History: Mon Mar 18 15:07:32 1991, DSJ, Created.

Definition at line 505 of file adaptive.cpp.

505  {
506  int i;
507 
508  /* first write the high level adaptive template struct */
509  fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
510 
511  /* then write out the basic integer templates */
512  WriteIntTemplates (File, Templates->Templates, unicharset);
513 
514  /* then write out the adaptive info for each class */
515  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
516  WriteAdaptedClass (File, Templates->Class[i],
517  Templates->Templates->Class[i]->NumConfigs);
518  }
519 } /* WriteAdaptedTemplates */
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:459
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1129
UNICHARSET unicharset
Definition: ccutil.h:70

◆ WriteIntTemplates()

void tesseract::Classify::WriteIntTemplates ( FILE *  File,
INT_TEMPLATES  Templates,
const UNICHARSET target_unicharset 
)

This routine writes Templates to File. The format is an efficient binary format. File must already be open for writing.

Parameters
Fileopen file to write templates to
Templatestemplates to save into File
target_unicharsetthe UNICHARSET to use
Returns
none
Note
Globals: none
Exceptions: none
History: Wed Feb 27 11:48:46 1991, DSJ, Created.

Definition at line 1129 of file intproto.cpp.

1130  {
1131  int i, j;
1132  INT_CLASS Class;
1133  int unicharset_size = target_unicharset.size();
1134  int version_id = -5; // When negated by the reader -1 becomes +1 etc.
1135 
1136  if (Templates->NumClasses != unicharset_size) {
1137  cprintf("Warning: executing WriteIntTemplates() with %d classes in"
1138  " Templates, while target_unicharset size is %d\n",
1139  Templates->NumClasses, unicharset_size);
1140  }
1141 
1142  /* first write the high level template struct */
1143  fwrite(&unicharset_size, sizeof(unicharset_size), 1, File);
1144  fwrite(&version_id, sizeof(version_id), 1, File);
1145  fwrite(&Templates->NumClassPruners, sizeof(Templates->NumClassPruners),
1146  1, File);
1147  fwrite(&Templates->NumClasses, sizeof(Templates->NumClasses), 1, File);
1148 
1149  /* then write out the class pruners */
1150  for (i = 0; i < Templates->NumClassPruners; i++)
1151  fwrite(Templates->ClassPruners[i],
1152  sizeof(CLASS_PRUNER_STRUCT), 1, File);
1153 
1154  /* then write out each class */
1155  for (i = 0; i < Templates->NumClasses; i++) {
1156  Class = Templates->Class[i];
1157 
1158  /* first write out the high level struct for the class */
1159  fwrite(&Class->NumProtos, sizeof(Class->NumProtos), 1, File);
1160  fwrite(&Class->NumProtoSets, sizeof(Class->NumProtoSets), 1, File);
1161  ASSERT_HOST(Class->NumConfigs == this->fontset_table_.get(Class->font_set_id).size);
1162  fwrite(&Class->NumConfigs, sizeof(Class->NumConfigs), 1, File);
1163  for (j = 0; j < Class->NumConfigs; ++j) {
1164  fwrite(&Class->ConfigLengths[j], sizeof(uinT16), 1, File);
1165  }
1166 
1167  /* then write out the proto lengths */
1168  if (MaxNumIntProtosIn (Class) > 0) {
1169  fwrite ((char *) (Class->ProtoLengths), sizeof (uinT8),
1170  MaxNumIntProtosIn (Class), File);
1171  }
1172 
1173  /* then write out the proto sets */
1174  for (j = 0; j < Class->NumProtoSets; j++)
1175  fwrite ((char *) Class->ProtoSets[j],
1176  sizeof (PROTO_SET_STRUCT), 1, File);
1177 
1178  /* then write the fonts info */
1179  fwrite(&Class->font_set_id, sizeof(int), 1, File);
1180  }
1181 
1182  /* Write the fonts info tables */
1184  this->fontinfo_table_.write(File,
1187 } /* WriteIntTemplates */
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
uinT16 NumProtos
Definition: intproto.h:108
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
int size() const
Definition: unicharset.h:297
unsigned char uinT8
Definition: host.h:32
#define MaxNumIntProtosIn(C)
Definition: intproto.h:168
bool write_spacing_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:211
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
bool write_set(FILE *f, const FontSet &fs)
Definition: fontinfo.cpp:253
unsigned short uinT16
Definition: host.h:34
uinT8 NumProtoSets
Definition: intproto.h:109
PROTO_SET ProtoSets[MAX_NUM_PROTO_SETS]
Definition: intproto.h:111
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
Definition: intproto.h:125
uinT16 ConfigLengths[MAX_NUM_CONFIGS]
Definition: intproto.h:113
uinT8 NumConfigs
Definition: intproto.h:110
#define ASSERT_HOST(x)
Definition: errcode.h:84
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
bool write_info(FILE *f, const FontInfo &fi)
Definition: fontinfo.cpp:168
uinT8 * ProtoLengths
Definition: intproto.h:112

◆ WriteTRFile()

bool tesseract::Classify::WriteTRFile ( const STRING filename)

Definition at line 97 of file blobclass.cpp.

97  {
98  STRING tr_filename = filename + ".tr";
99  FILE* fp = Efopen(tr_filename.string(), "wb");
100  int len = tr_file_data_.length();
101  bool result =
102  fwrite(&tr_file_data_[0], sizeof(tr_file_data_[0]), len, fp) == len;
103  fclose(fp);
104  tr_file_data_.truncate_at(0);
105  return result;
106 }
inT32 length() const
Definition: strngs.cpp:196
void truncate_at(inT32 index)
Definition: strngs.cpp:272
const char * string() const
Definition: strngs.cpp:201
FILE * Efopen(const char *Name, const char *Mode)
Definition: efio.cpp:43
Definition: strngs.h:44

Member Data Documentation

◆ AdaptedTemplates

ADAPT_TEMPLATES tesseract::Classify::AdaptedTemplates

Definition at line 473 of file classify.h.

◆ AllConfigsOff

BIT_VECTOR tesseract::Classify::AllConfigsOff

Definition at line 482 of file classify.h.

◆ AllConfigsOn

BIT_VECTOR tesseract::Classify::AllConfigsOn

Definition at line 481 of file classify.h.

◆ allow_blob_division

bool tesseract::Classify::allow_blob_division = true

"Use divisible blobs chopping"

Definition at line 382 of file classify.h.

◆ AllProtosOn

BIT_VECTOR tesseract::Classify::AllProtosOn

Definition at line 480 of file classify.h.

◆ BackupAdaptedTemplates

ADAPT_TEMPLATES tesseract::Classify::BackupAdaptedTemplates

Definition at line 477 of file classify.h.

◆ certainty_scale

double tesseract::Classify::certainty_scale = 20.0

"Certainty scaling factor"

Definition at line 437 of file classify.h.

◆ classify_adapt_feature_threshold

int tesseract::Classify::classify_adapt_feature_threshold = 230

"Threshold for good features during adaptive 0-255"

Definition at line 447 of file classify.h.

◆ classify_adapt_proto_threshold

int tesseract::Classify::classify_adapt_proto_threshold = 230

"Threshold for good protos during adaptive 0-255"

Definition at line 445 of file classify.h.

◆ classify_adapted_pruning_factor

double tesseract::Classify::classify_adapted_pruning_factor = 2.5

"Prune poor adapted results this much worse than best result"

Definition at line 441 of file classify.h.

◆ classify_adapted_pruning_threshold

double tesseract::Classify::classify_adapted_pruning_threshold = -1.0

"Threshold at which classify_adapted_pruning_factor starts"

Definition at line 443 of file classify.h.

◆ classify_bln_numeric_mode

bool tesseract::Classify::classify_bln_numeric_mode = 0

"Assume the input is numbers [0-9]."

Definition at line 500 of file classify.h.

◆ classify_char_norm_range

double tesseract::Classify::classify_char_norm_range = 0.2

"Character Normalization Range ..."

Definition at line 396 of file classify.h.

◆ classify_character_fragments_garbage_certainty_threshold

double tesseract::Classify::classify_character_fragments_garbage_certainty_threshold = -3.0

"Exclude fragments that do not match any whole character" " with at least this certainty"

Definition at line 453 of file classify.h.

◆ classify_class_pruner_multiplier

int tesseract::Classify::classify_class_pruner_multiplier = 15

"Class Pruner Multiplier 0-255: "

Definition at line 465 of file classify.h.

◆ classify_class_pruner_threshold

int tesseract::Classify::classify_class_pruner_threshold = 229

"Class Pruner Threshold 0-255"

Definition at line 463 of file classify.h.

◆ classify_cp_cutoff_strength

int tesseract::Classify::classify_cp_cutoff_strength = 7

"Class Pruner CutoffStrength: "

Definition at line 467 of file classify.h.

◆ classify_debug_character_fragments

bool tesseract::Classify::classify_debug_character_fragments = FALSE

"Bring up graphical debugging windows for fragments training"

Definition at line 455 of file classify.h.

◆ classify_debug_level

int tesseract::Classify::classify_debug_level = 0

"Classify debug level"

Definition at line 390 of file classify.h.

◆ classify_enable_adaptive_debugger

bool tesseract::Classify::classify_enable_adaptive_debugger = 0

"Enable match debugger"

Definition at line 414 of file classify.h.

◆ classify_enable_adaptive_matcher

bool tesseract::Classify::classify_enable_adaptive_matcher = 1

"Enable adaptive classifier"

Definition at line 409 of file classify.h.

◆ classify_enable_learning

bool tesseract::Classify::classify_enable_learning = true

"Enable adaptive classifier"

Definition at line 389 of file classify.h.

◆ classify_integer_matcher_multiplier

int tesseract::Classify::classify_integer_matcher_multiplier = 10

"Integer Matcher Multiplier 0-255: "

Definition at line 469 of file classify.h.

◆ classify_learn_debug_str

char* tesseract::Classify::classify_learn_debug_str = ""

"Class str to debug learning"

Definition at line 459 of file classify.h.

◆ classify_learning_debug_level

int tesseract::Classify::classify_learning_debug_level = 0

"Learning Debug Level: "

Definition at line 419 of file classify.h.

◆ classify_max_certainty_margin

double tesseract::Classify::classify_max_certainty_margin = 5.5

"Veto difference between classifier certainties"

Definition at line 404 of file classify.h.

◆ classify_max_norm_scale_x

double tesseract::Classify::classify_max_norm_scale_x = 0.325

"Max char x-norm scale ..."

Definition at line 398 of file classify.h.

◆ classify_max_norm_scale_y

double tesseract::Classify::classify_max_norm_scale_y = 0.325

"Max char y-norm scale ..."

Definition at line 400 of file classify.h.

◆ classify_max_rating_ratio

double tesseract::Classify::classify_max_rating_ratio = 1.5

"Veto ratio between classifier ratings"

Definition at line 402 of file classify.h.

◆ classify_min_norm_scale_x

double tesseract::Classify::classify_min_norm_scale_x = 0.0

"Min char x-norm scale ..."

Definition at line 397 of file classify.h.

◆ classify_min_norm_scale_y

double tesseract::Classify::classify_min_norm_scale_y = 0.0

"Min char y-norm scale ..."

Definition at line 399 of file classify.h.

◆ classify_misfit_junk_penalty

double tesseract::Classify::classify_misfit_junk_penalty = 0.0

"Penalty to apply when a non-alnum is vertically out of " "its expected textline position"

Definition at line 435 of file classify.h.

◆ classify_nonlinear_norm

bool tesseract::Classify::classify_nonlinear_norm = 0

"Non-linear stroke-density normalization"

Definition at line 416 of file classify.h.

◆ classify_norm_method

int tesseract::Classify::classify_norm_method = character

"Normalization Method ..."

Definition at line 394 of file classify.h.

◆ classify_save_adapted_templates

bool tesseract::Classify::classify_save_adapted_templates = 0

"Save adapted templates to a file"

Definition at line 413 of file classify.h.

◆ classify_use_pre_adapted_templates

bool tesseract::Classify::classify_use_pre_adapted_templates = 0

"Use pre-adapted classifier templates"

Definition at line 411 of file classify.h.

◆ disable_character_fragments

bool tesseract::Classify::disable_character_fragments = TRUE

"Do not include character fragments in the" " results of the classifier"

Definition at line 450 of file classify.h.

◆ EnableLearning

bool tesseract::Classify::EnableLearning

Definition at line 484 of file classify.h.

◆ feature_defs_

FEATURE_DEFS_STRUCT tesseract::Classify::feature_defs_
protected

Definition at line 507 of file classify.h.

◆ fontinfo_table_

UnicityTable<FontInfo> tesseract::Classify::fontinfo_table_

Definition at line 488 of file classify.h.

◆ fontset_table_

UnicityTable<FontSet> tesseract::Classify::fontset_table_

Definition at line 496 of file classify.h.

◆ il1_adaption_test

int tesseract::Classify::il1_adaption_test = 0

"Don't adapt to i/I at beginning of word"

Definition at line 498 of file classify.h.

◆ im_

IntegerMatcher tesseract::Classify::im_
protected

Definition at line 503 of file classify.h.

◆ matcher_avg_noise_size

double tesseract::Classify::matcher_avg_noise_size = 12.0

"Avg. noise blob length: "

Definition at line 425 of file classify.h.

◆ matcher_bad_match_pad

double tesseract::Classify::matcher_bad_match_pad = 0.15

"Bad Match Pad (0-1)"

Definition at line 423 of file classify.h.

◆ matcher_clustering_max_angle_delta

double tesseract::Classify::matcher_clustering_max_angle_delta = 0.015

"Maximum angle delta for prototype clustering"

Definition at line 432 of file classify.h.

◆ matcher_debug_flags

int tesseract::Classify::matcher_debug_flags = 0

"Matcher Debug Flags"

Definition at line 418 of file classify.h.

◆ matcher_debug_level

int tesseract::Classify::matcher_debug_level = 0

"Matcher Debug Level"

Definition at line 417 of file classify.h.

◆ matcher_debug_separate_windows

bool tesseract::Classify::matcher_debug_separate_windows = FALSE

"Use two different windows for debugging the matching: " "One for the protos and one for the features."

Definition at line 458 of file classify.h.

◆ matcher_good_threshold

double tesseract::Classify::matcher_good_threshold = 0.125

"Good Match (0-1)"

Definition at line 420 of file classify.h.

◆ matcher_min_examples_for_prototyping

int tesseract::Classify::matcher_min_examples_for_prototyping = 3

"Reliable Config Threshold"

Definition at line 428 of file classify.h.

◆ matcher_perfect_threshold

double tesseract::Classify::matcher_perfect_threshold = 0.02

"Perfect Match (0-1)"

Definition at line 422 of file classify.h.

◆ matcher_permanent_classes_min

int tesseract::Classify::matcher_permanent_classes_min = 1

"Min # of permanent classes"

Definition at line 426 of file classify.h.

◆ matcher_rating_margin

double tesseract::Classify::matcher_rating_margin = 0.1

"New template margin (0-1)"

Definition at line 424 of file classify.h.

◆ matcher_reliable_adaptive_result

double tesseract::Classify::matcher_reliable_adaptive_result = 0.0

"Great Match (0-1)"

Definition at line 421 of file classify.h.

◆ matcher_sufficient_examples_for_prototyping

int tesseract::Classify::matcher_sufficient_examples_for_prototyping = 5

"Enable adaption even if the ambiguities have not been seen"

Definition at line 430 of file classify.h.

◆ NormProtos

NORM_PROTOS* tesseract::Classify::NormProtos

Definition at line 486 of file classify.h.

◆ PreTrainedTemplates

INT_TEMPLATES tesseract::Classify::PreTrainedTemplates

Definition at line 469 of file classify.h.

◆ prioritize_division

bool tesseract::Classify::prioritize_division = FALSE

"Prioritize blob division over chopping"

Definition at line 387 of file classify.h.

◆ rating_scale

double tesseract::Classify::rating_scale = 1.5

"Rating scaling factor"

Definition at line 436 of file classify.h.

◆ shape_table_

ShapeTable* tesseract::Classify::shape_table_
protected

Definition at line 512 of file classify.h.

◆ speckle_large_max_size

double tesseract::Classify::speckle_large_max_size = 0.30

"Max large speckle size"

Definition at line 501 of file classify.h.

◆ speckle_rating_penalty

double tesseract::Classify::speckle_rating_penalty = 10.0

"Penalty to add to worst rating for noise"

Definition at line 503 of file classify.h.

◆ TempProtoMask

BIT_VECTOR tesseract::Classify::TempProtoMask

Definition at line 483 of file classify.h.

◆ tess_bn_matching

bool tesseract::Classify::tess_bn_matching = 0

"Baseline Normalized Matching"

Definition at line 408 of file classify.h.

◆ tess_cn_matching

bool tesseract::Classify::tess_cn_matching = 0

"Character Normalized Matching"

Definition at line 407 of file classify.h.

◆ tessedit_class_miss_scale

double tesseract::Classify::tessedit_class_miss_scale = 0.00390625

"Scale factor for features not used"

Definition at line 439 of file classify.h.

◆ tessedit_single_match

int tesseract::Classify::tessedit_single_match = FALSE

"Top choice only from CP"

Definition at line 388 of file classify.h.


The documentation for this class was generated from the following files: