tesseract  3.05.02
intmatcher.h
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: intmatcher.h
3  ** Purpose: Interface to high level generic classifier routines.
4  ** Author: Robert Moss
5  ** History: Wed Feb 13 15:24:15 MST 1991, RWM, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 #ifndef INTMATCHER_H
19 #define INTMATCHER_H
20 
21 #include "params.h"
22 
23 // Character fragments could be present in the trained templaes
24 // but turned on/off on the language-by-language basis or depending
25 // on particular properties of the corpus (e.g. when we expect the
26 // images to have low exposure).
28  "Do not include character fragments in the"
29  " results of the classifier");
30 
32  "Integer Matcher Multiplier 0-255: ");
33 
34 
38 #include "intproto.h"
39 #include "cutoffs.h"
40 
41 namespace tesseract {
42 struct UnicharRating;
43 }
44 
46  CP_RESULT_STRUCT() : Rating(0.0f), Class(0) {}
47 
50 };
51 
52 /*----------------------------------------------------------------------------
53  Variables
54 -----------------------------------------------------------------------------*/
55 
57  "Threshold for good protos during adaptive 0-255: ");
58 
60  "Threshold for good features during adaptive 0-255: ");
61 
66 #define SE_TABLE_BITS 9
67 #define SE_TABLE_SIZE 512
68 
73 
74  void Clear(const INT_CLASS class_template);
75  void ClearFeatureEvidence(const INT_CLASS class_template);
76  void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures,
77  inT32 used_features);
79  INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures);
80 };
81 
82 
84  public:
85  // Integer Matcher Theta Fudge (0-255).
86  static const int kIntThetaFudge = 128;
87  // Bits in Similarity to Evidence Lookup (8-9).
88  static const int kEvidenceTableBits = 9;
89  // Integer Evidence Truncation Bits (8-14).
90  static const int kIntEvidenceTruncBits = 14;
91  // Similarity to Evidence Table Exponential Multiplier.
92  static const float kSEExponentialMultiplier;
93  // Center of Similarity Curve.
94  static const float kSimilarityCenter;
95 
96  IntegerMatcher() : classify_debug_level_(0) {}
97 
98  void Init(tesseract::IntParam *classify_debug_level);
99 
100  void Match(INT_CLASS ClassTemplate,
101  BIT_VECTOR ProtoMask,
102  BIT_VECTOR ConfigMask,
103  inT16 NumFeatures,
104  const INT_FEATURE_STRUCT* Features,
105  tesseract::UnicharRating* Result,
106  int AdaptFeatureThreshold,
107  int Debug,
108  bool SeparateDebugWindows);
109 
110  // Applies the CN normalization factor to the given rating and returns
111  // the modified rating.
112  float ApplyCNCorrection(float rating, int blob_length,
113  int normalization_factor, int matcher_multiplier);
114 
115  int FindGoodProtos(INT_CLASS ClassTemplate,
116  BIT_VECTOR ProtoMask,
117  BIT_VECTOR ConfigMask,
118  uinT16 BlobLength,
119  inT16 NumFeatures,
120  INT_FEATURE_ARRAY Features,
121  PROTO_ID *ProtoArray,
122  int AdaptProtoThreshold,
123  int Debug);
124 
125  int FindBadFeatures(INT_CLASS ClassTemplate,
126  BIT_VECTOR ProtoMask,
127  BIT_VECTOR ConfigMask,
128  uinT16 BlobLength,
129  inT16 NumFeatures,
130  INT_FEATURE_ARRAY Features,
131  FEATURE_ID *FeatureArray,
132  int AdaptFeatureThreshold,
133  int Debug);
134 
135  private:
136  int UpdateTablesForFeature(
137  INT_CLASS ClassTemplate,
138  BIT_VECTOR ProtoMask,
139  BIT_VECTOR ConfigMask,
140  int FeatureNum,
141  const INT_FEATURE_STRUCT* Feature,
142  ScratchEvidence *evidence,
143  int Debug);
144 
145  int FindBestMatch(INT_CLASS ClassTemplate,
146  const ScratchEvidence &tables,
147  tesseract::UnicharRating* Result);
148 
149 #ifndef GRAPHICS_DISABLED
150  void DebugFeatureProtoError(
151  INT_CLASS ClassTemplate,
152  BIT_VECTOR ProtoMask,
153  BIT_VECTOR ConfigMask,
154  const ScratchEvidence &tables,
155  inT16 NumFeatures,
156  int Debug);
157 
158  void DisplayProtoDebugInfo(
159  INT_CLASS ClassTemplate,
160  BIT_VECTOR ProtoMask,
161  BIT_VECTOR ConfigMask,
162  const ScratchEvidence &tables,
163  bool SeparateDebugWindows);
164 
165  void DisplayFeatureDebugInfo(
166  INT_CLASS ClassTemplate,
167  BIT_VECTOR ProtoMask,
168  BIT_VECTOR ConfigMask,
169  inT16 NumFeatures,
170  const INT_FEATURE_STRUCT* Features,
171  int AdaptFeatureThreshold,
172  int Debug,
173  bool SeparateDebugWindows);
174 #endif
175 
176 
177  private:
178  uinT8 similarity_evidence_table_[SE_TABLE_SIZE];
179  uinT32 evidence_table_mask_;
180  uinT32 mult_trunc_shift_bits_;
181  uinT32 table_trunc_shift_bits_;
182  tesseract::IntParam *classify_debug_level_;
183  uinT32 evidence_mult_mask_;
184 };
185 
189 void IMDebugConfiguration(INT_FEATURE FeatureNum,
190  uinT16 ActualProtoNum,
191  uinT8 Evidence,
192  BIT_VECTOR ConfigMask,
193  uinT32 ConfigWord);
194 
195 void IMDebugConfigurationSum(INT_FEATURE FeatureNum,
196  uinT8 *FeatureEvidence,
197  inT32 ConfigCount);
198 
199 void HeapSort (int n, register int ra[], register int rb[]);
200 
204 #endif
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:557
#define MAX_PROTO_INDEX
Definition: intproto.h:43
uinT8 feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:70
static const int kEvidenceTableBits
Definition: intmatcher.h:88
short inT16
Definition: host.h:33
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
int classify_adapt_proto_thresh
FLOAT32 Rating
Definition: intmatcher.h:48
void Init(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:679
void UpdateSumOfProtoEvidences(INT_CLASS ClassTemplate, BIT_VECTOR ConfigMask, inT16 NumFeatures)
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:155
#define INT_VAR_H(name, val, comment)
Definition: params.h:265
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475
inT16 PROTO_ID
Definition: matchdefs.h:41
int sum_feature_evidence_[MAX_NUM_CONFIGS]
Definition: intmatcher.h:71
bool disable_character_fragments
unsigned char uinT8
Definition: host.h:32
CLASS_ID Class
Definition: intmatcher.h:49
int classify_integer_matcher_multiplier
#define MAX_NUM_PROTOS
Definition: intproto.h:47
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
void HeapSort(int n, register int ra[], register int rb[])
void IMDebugConfigurationSum(INT_FEATURE FeatureNum, uinT8 *FeatureEvidence, inT32 ConfigCount)
void Clear(const INT_CLASS class_template)
Definition: intmatcher.cpp:709
void IMDebugConfiguration(INT_FEATURE FeatureNum, uinT16 ActualProtoNum, uinT8 Evidence, BIT_VECTOR ConfigMask, uinT32 ConfigWord)
int classify_adapt_feature_thresh
#define BOOL_VAR_H(name, val, comment)
Definition: params.h:268
unsigned short uinT16
Definition: host.h:34
static const float kSimilarityCenter
Definition: intmatcher.h:94
static const int kIntEvidenceTruncBits
Definition: intmatcher.h:90
uinT8 proto_evidence_[MAX_NUM_PROTOS][MAX_PROTO_INDEX]
Definition: intmatcher.h:72
#define FALSE
Definition: capi.h:46
uinT8 FEATURE_ID
Definition: matchdefs.h:47
float FLOAT32
Definition: host.h:44
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
static const int kIntThetaFudge
Definition: intmatcher.h:86
int inT32
Definition: host.h:35
void ClearFeatureEvidence(const INT_CLASS class_template)
Definition: intmatcher.cpp:716
void NormalizeSums(INT_CLASS ClassTemplate, inT16 NumFeatures, inT32 used_features)
static const float kSEExponentialMultiplier
Definition: intmatcher.h:92
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:627
unsigned int uinT32
Definition: host.h:36
#define SE_TABLE_SIZE
Definition: intmatcher.h:67
#define MAX_NUM_CONFIGS
Definition: intproto.h:46