|
| ClassPruner (int max_classes) |
|
| ~ClassPruner () |
|
void | ComputeScores (const INT_TEMPLATES_STRUCT *int_templates, int num_features, const INT_FEATURE_STRUCT *features) |
|
void | AdjustForExpectedNumFeatures (const uinT16 *expected_num_features, int cutoff_strength) |
|
void | DisableDisabledClasses (const UNICHARSET &unicharset) |
|
void | DisableFragments (const UNICHARSET &unicharset) |
|
void | NormalizeForXheight (int norm_multiplier, const uinT8 *normalization_factors) |
|
void | NoNormalization () |
|
void | PruneAndSort (int pruning_factor, int keep_this, bool max_of_non_fragments, const UNICHARSET &unicharset) |
|
void | DebugMatch (const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const INT_FEATURE_STRUCT *features) const |
|
void | SummarizeResult (const Classify &classify, const INT_TEMPLATES_STRUCT *int_templates, const uinT16 *expected_num_features, int norm_multiplier, const uinT8 *normalization_factors) const |
|
int | SetupResults (GenericVector< CP_RESULT_STRUCT > *results) const |
|
Definition at line 107 of file intmatcher.cpp.
◆ ClassPruner()
tesseract::ClassPruner::ClassPruner |
( |
int |
max_classes | ) |
|
|
inline |
Definition at line 109 of file intmatcher.cpp.
116 max_classes_ = max_classes;
119 class_count_ =
new int[rounded_classes_];
120 norm_count_ =
new int[rounded_classes_];
121 sort_key_ =
new int[rounded_classes_ + 1];
122 sort_index_ =
new int[rounded_classes_ + 1];
123 for (
int i = 0; i < rounded_classes_; i++) {
126 pruning_threshold_ = 0;
#define WERDS_PER_CP_VECTOR
#define NUM_BITS_PER_CLASS
int RoundUp(int n, int block_size)
◆ ~ClassPruner()
tesseract::ClassPruner::~ClassPruner |
( |
| ) |
|
|
inline |
Definition at line 131 of file intmatcher.cpp.
132 delete []class_count_;
133 delete []norm_count_;
135 delete []sort_index_;
◆ AdjustForExpectedNumFeatures()
void tesseract::ClassPruner::AdjustForExpectedNumFeatures |
( |
const uinT16 * |
expected_num_features, |
|
|
int |
cutoff_strength |
|
) |
| |
|
inline |
Adjusts the scores according to the number of expected features. Used in lieu of a constant bias, this penalizes classes that expect more features than there are present. Thus an actual c will score higher for c than e, even though almost all the features match e as well as c, because e expects more features to be present.
Definition at line 211 of file intmatcher.cpp.
213 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
214 if (num_features_ < expected_num_features[class_id]) {
215 int deficit = expected_num_features[class_id] - num_features_;
216 class_count_[class_id] -= class_count_[class_id] * deficit /
217 (num_features_ * cutoff_strength + deficit);
◆ ComputeScores()
Computes the scores for every class in the character set, by summing the weights for each feature and stores the sums internally in class_count_.
Definition at line 140 of file intmatcher.cpp.
142 num_features_ = num_features;
144 for (
int f = 0; f < num_features; ++f) {
153 for (
int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
156 const uinT32* pruner_word_ptr =
159 uinT32 pruner_word = *pruner_word_ptr++;
#define CLASS_PRUNER_CLASS_MASK
#define WERDS_PER_CP_VECTOR
#define NUM_BITS_PER_CLASS
uinT32 p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
◆ DebugMatch()
Prints debug info on the class pruner matches for the pruned classes only.
Definition at line 300 of file intmatcher.cpp.
304 int max_num_classes = int_templates->
NumClasses;
305 for (
int f = 0; f < num_features_; ++f) {
307 tprintf(
"F=%3d(%d,%d,%d),", f, feature->
X, feature->
Y, feature->
Theta);
313 for (
int pruner_set = 0; pruner_set < num_pruners; ++pruner_set) {
316 const uinT32* pruner_word_ptr =
319 uinT32 pruner_word = *pruner_word_ptr++;
320 for (
int word_class = 0; word_class < 16 &&
321 class_id < max_num_classes; ++word_class, ++class_id) {
322 if (norm_count_[class_id] >= pruning_threshold_) {
324 classify.ClassIDToDebugStr(int_templates,
325 class_id, 0).string(),
#define CLASS_PRUNER_CLASS_MASK
#define WERDS_PER_CP_VECTOR
#define NUM_BITS_PER_CLASS
uinT32 p[NUM_CP_BUCKETS][NUM_CP_BUCKETS][NUM_CP_BUCKETS][WERDS_PER_CP_VECTOR]
CLASS_PRUNER_STRUCT * ClassPruners[MAX_NUM_CLASS_PRUNERS]
◆ DisableDisabledClasses()
void tesseract::ClassPruner::DisableDisabledClasses |
( |
const UNICHARSET & |
unicharset | ) |
|
|
inline |
Zeros the scores for classes disabled in the unicharset. Implements the black-list to recognize a subset of the character set.
Definition at line 224 of file intmatcher.cpp.
225 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
227 class_count_[class_id] = 0;
bool get_enabled(UNICHAR_ID unichar_id) const
◆ DisableFragments()
void tesseract::ClassPruner::DisableFragments |
( |
const UNICHARSET & |
unicharset | ) |
|
|
inline |
Zeros the scores of fragments.
Definition at line 232 of file intmatcher.cpp.
233 for (
int class_id = 0; class_id < max_classes_; ++class_id) {
237 class_count_[class_id] = 0;
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
◆ NoNormalization()
void tesseract::ClassPruner::NoNormalization |
( |
| ) |
|
|
inline |
The nop normalization copies the class_count_ array to norm_count_.
Definition at line 255 of file intmatcher.cpp.
256 for (
int class_id = 0; class_id < max_classes_; class_id++) {
257 norm_count_[class_id] = class_count_[class_id];
◆ NormalizeForXheight()
void tesseract::ClassPruner::NormalizeForXheight |
( |
int |
norm_multiplier, |
|
|
const uinT8 * |
normalization_factors |
|
) |
| |
|
inline |
Normalizes the counts for xheight, putting the normalized result in norm_count_. Applies a simple subtractive penalty for incorrect vertical position provided by the normalization_factors array, indexed by character class, and scaled by the norm_multiplier.
Definition at line 246 of file intmatcher.cpp.
248 for (
int class_id = 0; class_id < max_classes_; class_id++) {
249 norm_count_[class_id] = class_count_[class_id] -
250 ((norm_multiplier * normalization_factors[class_id]) >> 8);
◆ PruneAndSort()
void tesseract::ClassPruner::PruneAndSort |
( |
int |
pruning_factor, |
|
|
int |
keep_this, |
|
|
bool |
max_of_non_fragments, |
|
|
const UNICHARSET & |
unicharset |
|
) |
| |
|
inline |
Prunes the classes using <the maximum count> * pruning_factor/256 as a threshold for keeping classes. If max_of_non_fragments, then ignore fragments in computing the maximum count.
Definition at line 264 of file intmatcher.cpp.
267 for (
int c = 0; c < max_classes_; ++c) {
268 if (norm_count_[c] > max_count &&
274 (!max_of_non_fragments || !unicharset.
get_fragment(c))) {
275 max_count = norm_count_[c];
279 pruning_threshold_ = (max_count * pruning_factor) >> 8;
281 if (pruning_threshold_ < 1)
282 pruning_threshold_ = 1;
284 for (
int class_id = 0; class_id < max_classes_; class_id++) {
285 if (norm_count_[class_id] >= pruning_threshold_ ||
286 class_id == keep_this) {
288 sort_index_[num_classes_] = class_id;
289 sort_key_[num_classes_] = norm_count_[class_id];
294 if (num_classes_ > 1)
295 HeapSort(num_classes_, sort_key_, sort_index_);
void HeapSort(int n, register int ra[], register int rb[])
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
◆ SetupResults()
Copies the pruned, sorted classes into the output results and returns the number of classes.
Definition at line 360 of file intmatcher.cpp.
363 for (
int c = 0; c < num_classes_; ++c) {
364 (*results)[c].Class = sort_index_[num_classes_ - c];
365 (*results)[c].Rating = 1.0 - sort_key_[num_classes_ - c] /
#define CLASS_PRUNER_CLASS_MASK
void init_to_size(int size, T t)
◆ SummarizeResult()
void tesseract::ClassPruner::SummarizeResult |
( |
const Classify & |
classify, |
|
|
const INT_TEMPLATES_STRUCT * |
int_templates, |
|
|
const uinT16 * |
expected_num_features, |
|
|
int |
norm_multiplier, |
|
|
const uinT8 * |
normalization_factors |
|
) |
| const |
|
inline |
Prints a summary of the pruner result.
Definition at line 337 of file intmatcher.cpp.
342 tprintf(
"CP:%d classes, %d features:\n", num_classes_, num_features_);
343 for (
int i = 0; i < num_classes_; ++i) {
344 int class_id = sort_index_[num_classes_ - i];
345 STRING class_string = classify.ClassIDToDebugStr(int_templates,
347 tprintf(
"%s:Initial=%d, E=%d, Xht-adj=%d, N=%d, Rat=%.2f\n",
349 class_count_[class_id],
350 expected_num_features[class_id],
351 (norm_multiplier * normalization_factors[class_id]) >> 8,
352 sort_key_[num_classes_ - i],
353 100.0 - 100.0 * sort_key_[num_classes_ - i] /
#define CLASS_PRUNER_CLASS_MASK
const char * string() const
The documentation for this class was generated from the following file: