tesseract  3.05.02
adaptmatch.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: adaptmatch.c
3  ** Purpose: High level adaptive matcher.
4  ** Author: Dan Johnson
5  ** History: Mon Mar 11 10:00:10 1991, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 /*-----------------------------------------------------------------------------
20  Include Files and Type Defines
21 -----------------------------------------------------------------------------*/
22 #ifdef HAVE_CONFIG_H
23 #include "config_auto.h"
24 #endif
25 
26 #include <ctype.h>
27 #include "shapeclassifier.h"
28 #include "ambigs.h"
29 #include "blobclass.h"
30 #include "blobs.h"
31 #include "callcpp.h"
32 #include "classify.h"
33 #include "const.h"
34 #include "dict.h"
35 #include "efio.h"
36 #include "emalloc.h"
37 #include "featdefs.h"
38 #include "float2int.h"
39 #include "genericvector.h"
40 #include "globals.h"
41 #include "helpers.h"
42 #include "intfx.h"
43 #include "intproto.h"
44 #include "mfoutline.h"
45 #include "ndminx.h"
46 #include "normfeat.h"
47 #include "normmatch.h"
48 #include "outfeat.h"
49 #include "pageres.h"
50 #include "params.h"
51 #include "picofeat.h"
52 #include "shapetable.h"
53 #include "tessclassifier.h"
54 #include "trainingsample.h"
55 #include "unicharset.h"
56 #include "werd.h"
57 
58 #include <stdio.h>
59 #include <string.h>
60 #include <stdlib.h>
61 #include <math.h>
62 #ifdef __UNIX__
63 #include <assert.h>
64 #endif
65 
66 #define ADAPT_TEMPLATE_SUFFIX ".a"
67 
68 #define MAX_MATCHES 10
69 #define UNLIKELY_NUM_FEAT 200
70 #define NO_DEBUG 0
71 #define MAX_ADAPTABLE_WERD_SIZE 40
72 
73 #define ADAPTABLE_WERD_ADJUSTMENT (0.05)
74 
75 #define Y_DIM_OFFSET (Y_SHIFT - BASELINE_Y_SHIFT)
76 
77 #define WORST_POSSIBLE_RATING (0.0f)
78 
81 
82 struct ADAPT_RESULTS {
90 
93  inline void Initialize() {
95  HasNonfragment = false;
96  ComputeBest();
97  }
98  // Computes best_unichar_id, best_match_index and best_rating.
99  void ComputeBest() {
100  best_unichar_id = INVALID_UNICHAR_ID;
101  best_match_index = -1;
103  for (int i = 0; i < match.size(); ++i) {
104  if (match[i].rating > best_rating) {
105  best_rating = match[i].rating;
106  best_unichar_id = match[i].unichar_id;
107  best_match_index = i;
108  }
109  }
110  }
111 };
112 
113 struct PROTO_KEY {
116  int ConfigId;
117 };
118 
119 /*-----------------------------------------------------------------------------
120  Private Macros
121 -----------------------------------------------------------------------------*/
122 inline bool MarginalMatch(float confidence, float matcher_great_threshold) {
123  return (1.0f - confidence) > matcher_great_threshold;
124 }
125 
126 /*-----------------------------------------------------------------------------
127  Private Function Prototypes
128 -----------------------------------------------------------------------------*/
129 // Returns the index of the given id in results, if present, or the size of the
130 // vector (index it will go at) if not present.
131 static int FindScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) {
132  for (int i = 0; i < results.match.size(); i++) {
133  if (results.match[i].unichar_id == id)
134  return i;
135  }
136  return results.match.size();
137 }
138 
139 // Returns the current rating for a unichar id if we have rated it, defaulting
140 // to WORST_POSSIBLE_RATING.
141 static float ScoredUnichar(UNICHAR_ID id, const ADAPT_RESULTS& results) {
142  int index = FindScoredUnichar(id, results);
143  if (index >= results.match.size()) return WORST_POSSIBLE_RATING;
144  return results.match[index].rating;
145 }
146 
147 void InitMatcherRatings(register FLOAT32 *Rating);
148 
149 int MakeTempProtoPerm(void *item1, void *item2);
150 
151 void SetAdaptiveThreshold(FLOAT32 Threshold);
152 
153 
154 /*-----------------------------------------------------------------------------
155  Public Code
156 -----------------------------------------------------------------------------*/
157 /*---------------------------------------------------------------------------*/
158 namespace tesseract {
185 void Classify::AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices) {
186  assert(Choices != NULL);
187  ADAPT_RESULTS *Results = new ADAPT_RESULTS;
188  Results->Initialize();
189 
190  ASSERT_HOST(AdaptedTemplates != NULL);
191 
192  DoAdaptiveMatch(Blob, Results);
193 
194  RemoveBadMatches(Results);
196  RemoveExtraPuncs(Results);
197  Results->ComputeBest();
198  ConvertMatchesToChoices(Blob->denorm(), Blob->bounding_box(), Results,
199  Choices);
200 
201  // TODO(rays) Move to before ConvertMatchesToChoices!
202  if (LargeSpeckle(*Blob) || Choices->length() == 0)
203  AddLargeSpeckleTo(Results->BlobLength, Choices);
204 
205  if (matcher_debug_level >= 1) {
206  tprintf("AD Matches = ");
207  PrintAdaptiveMatchResults(*Results);
208  }
209 
210 #ifndef GRAPHICS_DISABLED
212  DebugAdaptiveClassifier(Blob, Results);
213 #endif
214 
215  delete Results;
216 } /* AdaptiveClassifier */
217 
218 // If *win is NULL, sets it to a new ScrollView() object with title msg.
219 // Clears the window and draws baselines.
220 void Classify::RefreshDebugWindow(ScrollView **win, const char *msg,
221  int y_offset, const TBOX &wbox) {
222  #ifndef GRAPHICS_DISABLED
223  const int kSampleSpaceWidth = 500;
224  if (*win == NULL) {
225  *win = new ScrollView(msg, 100, y_offset, kSampleSpaceWidth * 2, 200,
226  kSampleSpaceWidth * 2, 200, true);
227  }
228  (*win)->Clear();
229  (*win)->Pen(64, 64, 64);
230  (*win)->Line(-kSampleSpaceWidth, kBlnBaselineOffset,
231  kSampleSpaceWidth, kBlnBaselineOffset);
232  (*win)->Line(-kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset,
233  kSampleSpaceWidth, kBlnXHeight + kBlnBaselineOffset);
234  (*win)->ZoomToRectangle(wbox.left(), wbox.top(),
235  wbox.right(), wbox.bottom());
236  #endif // GRAPHICS_DISABLED
237 }
238 
239 // Learns the given word using its chopped_word, seam_array, denorm,
240 // box_word, best_state, and correct_text to learn both correctly and
241 // incorrectly segmented blobs. If fontname is not NULL, then LearnBlob
242 // is called and the data will be saved in an internal buffer.
243 // Otherwise AdaptToBlob is called for adaption within a document.
244 void Classify::LearnWord(const char* fontname, WERD_RES* word) {
245  int word_len = word->correct_text.size();
246  if (word_len == 0) return;
247 
248  float* thresholds = NULL;
249  if (fontname == NULL) {
250  // Adaption mode.
251  if (!EnableLearning || word->best_choice == NULL)
252  return; // Can't or won't adapt.
253 
255  tprintf("\n\nAdapting to word = %s\n",
256  word->best_choice->debug_string().string());
257  thresholds = new float[word_len];
261  matcher_rating_margin, thresholds);
262  }
263  int start_blob = 0;
264 
265  #ifndef GRAPHICS_DISABLED
267  if (learn_fragmented_word_debug_win_ != NULL) {
268  window_wait(learn_fragmented_word_debug_win_);
269  }
270  RefreshDebugWindow(&learn_fragments_debug_win_, "LearnPieces", 400,
271  word->chopped_word->bounding_box());
272  RefreshDebugWindow(&learn_fragmented_word_debug_win_, "LearnWord", 200,
273  word->chopped_word->bounding_box());
274  word->chopped_word->plot(learn_fragmented_word_debug_win_);
276  }
277  #endif // GRAPHICS_DISABLED
278 
279  for (int ch = 0; ch < word_len; ++ch) {
281  tprintf("\nLearning %s\n", word->correct_text[ch].string());
282  }
283  if (word->correct_text[ch].length() > 0) {
284  float threshold = thresholds != NULL ? thresholds[ch] : 0.0f;
285 
286  LearnPieces(fontname, start_blob, word->best_state[ch], threshold,
287  CST_WHOLE, word->correct_text[ch].string(), word);
288 
289  if (word->best_state[ch] > 1 && !disable_character_fragments) {
290  // Check that the character breaks into meaningful fragments
291  // that each match a whole character with at least
292  // classify_character_fragments_garbage_certainty_threshold
293  bool garbage = false;
294  int frag;
295  for (frag = 0; frag < word->best_state[ch]; ++frag) {
296  TBLOB* frag_blob = word->chopped_word->blobs[start_blob + frag];
298  garbage |= LooksLikeGarbage(frag_blob);
299  }
300  }
301  // Learn the fragments.
302  if (!garbage) {
303  bool pieces_all_natural = word->PiecesAllNatural(start_blob,
304  word->best_state[ch]);
305  if (pieces_all_natural || !prioritize_division) {
306  for (frag = 0; frag < word->best_state[ch]; ++frag) {
307  GenericVector<STRING> tokens;
308  word->correct_text[ch].split(' ', &tokens);
309 
310  tokens[0] = CHAR_FRAGMENT::to_string(
311  tokens[0].string(), frag, word->best_state[ch],
312  pieces_all_natural);
313 
314  STRING full_string;
315  for (int i = 0; i < tokens.size(); i++) {
316  full_string += tokens[i];
317  if (i != tokens.size() - 1)
318  full_string += ' ';
319  }
320  LearnPieces(fontname, start_blob + frag, 1, threshold,
321  CST_FRAGMENT, full_string.string(), word);
322  }
323  }
324  }
325  }
326 
327  // TODO(rays): re-enable this part of the code when we switch to the
328  // new classifier that needs to see examples of garbage.
329  /*
330  if (word->best_state[ch] > 1) {
331  // If the next blob is good, make junk with the rightmost fragment.
332  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
333  LearnPieces(fontname, start_blob + word->best_state[ch] - 1,
334  word->best_state[ch + 1] + 1,
335  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
336  }
337  // If the previous blob is good, make junk with the leftmost fragment.
338  if (ch > 0 && word->correct_text[ch - 1].length() > 0) {
339  LearnPieces(fontname, start_blob - word->best_state[ch - 1],
340  word->best_state[ch - 1] + 1,
341  threshold, CST_IMPROPER, INVALID_UNICHAR, word);
342  }
343  }
344  // If the next blob is good, make a join with it.
345  if (ch + 1 < word_len && word->correct_text[ch + 1].length() > 0) {
346  STRING joined_text = word->correct_text[ch];
347  joined_text += word->correct_text[ch + 1];
348  LearnPieces(fontname, start_blob,
349  word->best_state[ch] + word->best_state[ch + 1],
350  threshold, CST_NGRAM, joined_text.string(), word);
351  }
352  */
353  }
354  start_blob += word->best_state[ch];
355  }
356  delete [] thresholds;
357 } // LearnWord.
358 
359 // Builds a blob of length fragments, from the word, starting at start,
360 // and then learns it, as having the given correct_text.
361 // If fontname is not NULL, then LearnBlob is called and the data will be
362 // saved in an internal buffer for static training.
363 // Otherwise AdaptToBlob is called for adaption within a document.
364 // threshold is a magic number required by AdaptToChar and generated by
365 // ComputeAdaptionThresholds.
366 // Although it can be partly inferred from the string, segmentation is
367 // provided to explicitly clarify the character segmentation.
368 void Classify::LearnPieces(const char* fontname, int start, int length,
369  float threshold, CharSegmentationType segmentation,
370  const char* correct_text, WERD_RES* word) {
371  // TODO(daria) Remove/modify this if/when we want
372  // to train and/or adapt to n-grams.
373  if (segmentation != CST_WHOLE &&
374  (segmentation != CST_FRAGMENT || disable_character_fragments))
375  return;
376 
377  if (length > 1) {
378  SEAM::JoinPieces(word->seam_array, word->chopped_word->blobs, start,
379  start + length - 1);
380  }
381  TBLOB* blob = word->chopped_word->blobs[start];
382  // Rotate the blob if needed for classification.
383  TBLOB* rotated_blob = blob->ClassifyNormalizeIfNeeded();
384  if (rotated_blob == NULL)
385  rotated_blob = blob;
386 
387  #ifndef GRAPHICS_DISABLED
388  // Draw debug windows showing the blob that is being learned if needed.
389  if (strcmp(classify_learn_debug_str.string(), correct_text) == 0) {
390  RefreshDebugWindow(&learn_debug_win_, "LearnPieces", 600,
391  word->chopped_word->bounding_box());
392  rotated_blob->plot(learn_debug_win_, ScrollView::GREEN, ScrollView::BROWN);
393  learn_debug_win_->Update();
394  window_wait(learn_debug_win_);
395  }
396  if (classify_debug_character_fragments && segmentation == CST_FRAGMENT) {
397  ASSERT_HOST(learn_fragments_debug_win_ != NULL); // set up in LearnWord
398  blob->plot(learn_fragments_debug_win_,
400  learn_fragments_debug_win_->Update();
401  }
402  #endif // GRAPHICS_DISABLED
403 
404  if (fontname != NULL) {
405  classify_norm_method.set_value(character); // force char norm spc 30/11/93
406  tess_bn_matching.set_value(false); // turn it off
407  tess_cn_matching.set_value(false);
408  DENORM bl_denorm, cn_denorm;
409  INT_FX_RESULT_STRUCT fx_info;
411  &bl_denorm, &cn_denorm, &fx_info);
412  LearnBlob(fontname, rotated_blob, cn_denorm, fx_info, correct_text);
413  } else if (unicharset.contains_unichar(correct_text)) {
414  UNICHAR_ID class_id = unicharset.unichar_to_id(correct_text);
415  int font_id = word->fontinfo != NULL
416  ? fontinfo_table_.get_id(*word->fontinfo)
417  : 0;
419  tprintf("Adapting to char = %s, thr= %g font_id= %d\n",
420  unicharset.id_to_unichar(class_id), threshold, font_id);
421  // If filename is not NULL we are doing recognition
422  // (as opposed to training), so we must have already set word fonts.
423  AdaptToChar(rotated_blob, class_id, font_id, threshold, AdaptedTemplates);
424  if (BackupAdaptedTemplates != NULL) {
425  // Adapt the backup templates too. They will be used if the primary gets
426  // too full.
427  AdaptToChar(rotated_blob, class_id, font_id, threshold,
429  }
430  } else if (classify_debug_level >= 1) {
431  tprintf("Can't adapt to %s not in unicharset\n", correct_text);
432  }
433  if (rotated_blob != blob) {
434  delete rotated_blob;
435  }
436 
437  SEAM::BreakPieces(word->seam_array, word->chopped_word->blobs, start,
438  start + length - 1);
439 } // LearnPieces.
440 
441 /*---------------------------------------------------------------------------*/
457  STRING Filename;
458  FILE *File;
459 
460  if (AdaptedTemplates != NULL &&
462  Filename = imagefile + ADAPT_TEMPLATE_SUFFIX;
463  File = fopen (Filename.string(), "wb");
464  if (File == NULL)
465  cprintf ("Unable to save adapted templates to %s!\n", Filename.string());
466  else {
467  cprintf ("\nSaving adapted templates to %s ...", Filename.string());
468  fflush(stdout);
470  cprintf ("\n");
471  fclose(File);
472  }
473  }
474 
475  if (AdaptedTemplates != NULL) {
477  AdaptedTemplates = NULL;
478  }
479  if (BackupAdaptedTemplates != NULL) {
481  BackupAdaptedTemplates = NULL;
482  }
483 
484  if (PreTrainedTemplates != NULL) {
486  PreTrainedTemplates = NULL;
487  }
489  FreeNormProtos();
490  if (AllProtosOn != NULL) {
495  AllProtosOn = NULL;
496  AllConfigsOn = NULL;
497  AllConfigsOff = NULL;
498  TempProtoMask = NULL;
499  }
500  delete shape_table_;
501  shape_table_ = NULL;
502  if (static_classifier_ != NULL) {
503  delete static_classifier_;
504  static_classifier_ = NULL;
505  }
506 } /* EndAdaptiveClassifier */
507 
508 
509 /*---------------------------------------------------------------------------*/
527 void Classify::InitAdaptiveClassifier(bool load_pre_trained_templates) {
529  return;
530  if (AllProtosOn != NULL)
531  EndAdaptiveClassifier(); // Don't leak with multiple inits.
532 
533  // If there is no language_data_path_prefix, the classifier will be
534  // adaptive only.
535  if (language_data_path_prefix.length() > 0 &&
536  load_pre_trained_templates) {
540  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded inttemp\n");
541 
546  tprintf("Error loading shape table!\n");
547  delete shape_table_;
548  shape_table_ = NULL;
549  } else if (tessdata_manager.DebugLevel() > 0) {
550  tprintf("Successfully loaded shape table!\n");
551  }
552  }
553 
558  CharNormCutoffs);
559  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded pffmtable\n");
560 
562  NormProtos =
565  if (tessdata_manager.DebugLevel() > 0) tprintf("Loaded normproto\n");
566  static_classifier_ = new TessClassifier(false, this);
567  }
568 
570  InitIntegerFX();
571 
579 
580  for (int i = 0; i < MAX_NUM_CLASSES; i++) {
581  BaselineCutoffs[i] = 0;
582  }
583 
585  FILE *File;
586  STRING Filename;
587 
588  Filename = imagefile;
589  Filename += ADAPT_TEMPLATE_SUFFIX;
590  File = fopen(Filename.string(), "rb");
591  if (File == NULL) {
593  } else {
594  cprintf("\nReading pre-adapted templates from %s ...\n",
595  Filename.string());
596  fflush(stdout);
598  cprintf("\n");
599  fclose(File);
601 
602  for (int i = 0; i < AdaptedTemplates->Templates->NumClasses; i++) {
603  BaselineCutoffs[i] = CharNormCutoffs[i];
604  }
605  }
606  } else {
607  if (AdaptedTemplates != NULL)
610  }
611 } /* InitAdaptiveClassifier */
612 
615  tprintf("Resetting adaptive classifier (NumAdaptationsFailed=%d)\n",
616  NumAdaptationsFailed);
617  }
620  if (BackupAdaptedTemplates != NULL)
622  BackupAdaptedTemplates = NULL;
623  NumAdaptationsFailed = 0;
624 }
625 
626 // If there are backup adapted templates, switches to those, otherwise resets
627 // the main adaptive classifier (because it is full.)
629  if (BackupAdaptedTemplates == NULL) {
631  return;
632  }
634  tprintf("Switch to backup adaptive classifier (NumAdaptationsFailed=%d)\n",
635  NumAdaptationsFailed);
636  }
639  BackupAdaptedTemplates = NULL;
640  NumAdaptationsFailed = 0;
641 }
642 
643 // Resets the backup adaptive classifier to empty.
645  if (BackupAdaptedTemplates != NULL)
648 }
649 
650 /*---------------------------------------------------------------------------*/
672 
674 
675 } /* SettupPass1 */
676 
677 
678 /*---------------------------------------------------------------------------*/
693 
694 } /* SettupPass2 */
695 
696 
697 /*---------------------------------------------------------------------------*/
718  CLASS_ID ClassId,
719  int FontinfoId,
720  ADAPT_CLASS Class,
721  ADAPT_TEMPLATES Templates) {
722  FEATURE_SET Features;
723  int Fid, Pid;
724  FEATURE Feature;
725  int NumFeatures;
726  TEMP_PROTO TempProto;
727  PROTO Proto;
728  INT_CLASS IClass;
730 
731  classify_norm_method.set_value(baseline);
732  Features = ExtractOutlineFeatures(Blob);
733  NumFeatures = Features->NumFeatures;
734  if (NumFeatures > UNLIKELY_NUM_FEAT || NumFeatures <= 0) {
735  FreeFeatureSet(Features);
736  return;
737  }
738 
739  Config = NewTempConfig(NumFeatures - 1, FontinfoId);
740  TempConfigFor(Class, 0) = Config;
741 
742  /* this is a kludge to construct cutoffs for adapted templates */
743  if (Templates == AdaptedTemplates)
744  BaselineCutoffs[ClassId] = CharNormCutoffs[ClassId];
745 
746  IClass = ClassForClassId (Templates->Templates, ClassId);
747 
748  for (Fid = 0; Fid < Features->NumFeatures; Fid++) {
749  Pid = AddIntProto (IClass);
750  assert (Pid != NO_PROTO);
751 
752  Feature = Features->Features[Fid];
753  TempProto = NewTempProto ();
754  Proto = &(TempProto->Proto);
755 
756  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
757  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
758  instead of the -0.25 to 0.75 used in baseline normalization */
759  Proto->Angle = Feature->Params[OutlineFeatDir];
760  Proto->X = Feature->Params[OutlineFeatX];
761  Proto->Y = Feature->Params[OutlineFeatY] - Y_DIM_OFFSET;
762  Proto->Length = Feature->Params[OutlineFeatLength];
763  FillABC(Proto);
764 
765  TempProto->ProtoId = Pid;
766  SET_BIT (Config->Protos, Pid);
767 
768  ConvertProto(Proto, Pid, IClass);
769  AddProtoToProtoPruner(Proto, Pid, IClass,
771 
772  Class->TempProtos = push (Class->TempProtos, TempProto);
773  }
774  FreeFeatureSet(Features);
775 
776  AddIntConfig(IClass);
777  ConvertConfig (AllProtosOn, 0, IClass);
778 
780  tprintf("Added new class '%s' with class id %d and %d protos.\n",
781  unicharset.id_to_unichar(ClassId), ClassId, NumFeatures);
783  DisplayAdaptedChar(Blob, IClass);
784  }
785 
786  if (IsEmptyAdaptedClass(Class))
787  (Templates->NumNonEmptyClasses)++;
788 } /* InitAdaptedClass */
789 
790 
791 /*---------------------------------------------------------------------------*/
813  INT_FEATURE_ARRAY IntFeatures,
814  FEATURE_SET *FloatFeatures) {
815  FEATURE_SET Features;
816  int NumFeatures;
817 
818  classify_norm_method.set_value(baseline);
819  Features = ExtractPicoFeatures(Blob);
820 
821  NumFeatures = Features->NumFeatures;
822  if (NumFeatures == 0 || NumFeatures > UNLIKELY_NUM_FEAT) {
823  FreeFeatureSet(Features);
824  return 0;
825  }
826 
827  ComputeIntFeatures(Features, IntFeatures);
828  *FloatFeatures = Features;
829 
830  return NumFeatures;
831 } /* GetAdaptiveFeatures */
832 
833 
834 /*-----------------------------------------------------------------------------
835  Private Code
836 -----------------------------------------------------------------------------*/
837 /*---------------------------------------------------------------------------*/
851  if (word->best_choice == NULL) return false;
852  int BestChoiceLength = word->best_choice->length();
853  float adaptable_score =
855  return // rules that apply in general - simplest to compute first
856  BestChoiceLength > 0 &&
857  BestChoiceLength == word->rebuild_word->NumBlobs() &&
858  BestChoiceLength <= MAX_ADAPTABLE_WERD_SIZE &&
859  // This basically ensures that the word is at least a dictionary match
860  // (freq word, user word, system dawg word, etc).
861  // Since all the other adjustments will make adjust factor higher
862  // than higher than adaptable_score=1.1+0.05=1.15
863  // Since these are other flags that ensure that the word is dict word,
864  // this check could be at times redundant.
865  word->best_choice->adjust_factor() <= adaptable_score &&
866  // Make sure that alternative choices are not dictionary words.
867  word->AlternativeChoiceAdjustmentsWorseThan(adaptable_score);
868 }
869 
870 /*---------------------------------------------------------------------------*/
886 void Classify::AdaptToChar(TBLOB* Blob, CLASS_ID ClassId, int FontinfoId,
887  FLOAT32 Threshold,
888  ADAPT_TEMPLATES adaptive_templates) {
889  int NumFeatures;
890  INT_FEATURE_ARRAY IntFeatures;
891  UnicharRating int_result;
892  INT_CLASS IClass;
893  ADAPT_CLASS Class;
894  TEMP_CONFIG TempConfig;
895  FEATURE_SET FloatFeatures;
896  int NewTempConfigId;
897 
898  if (!LegalClassId (ClassId))
899  return;
900 
901  int_result.unichar_id = ClassId;
902  Class = adaptive_templates->Class[ClassId];
903  assert(Class != NULL);
904  if (IsEmptyAdaptedClass(Class)) {
905  InitAdaptedClass(Blob, ClassId, FontinfoId, Class, adaptive_templates);
906  } else {
907  IClass = ClassForClassId(adaptive_templates->Templates, ClassId);
908 
909  NumFeatures = GetAdaptiveFeatures(Blob, IntFeatures, &FloatFeatures);
910  if (NumFeatures <= 0) {
911  return; // Features already freed by GetAdaptiveFeatures.
912  }
913 
914  // Only match configs with the matching font.
915  BIT_VECTOR MatchingFontConfigs = NewBitVector(MAX_NUM_PROTOS);
916  for (int cfg = 0; cfg < IClass->NumConfigs; ++cfg) {
917  if (GetFontinfoId(Class, cfg) == FontinfoId) {
918  SET_BIT(MatchingFontConfigs, cfg);
919  } else {
920  reset_bit(MatchingFontConfigs, cfg);
921  }
922  }
923  im_.Match(IClass, AllProtosOn, MatchingFontConfigs,
924  NumFeatures, IntFeatures,
927  FreeBitVector(MatchingFontConfigs);
928 
929  SetAdaptiveThreshold(Threshold);
930 
931  if (1.0f - int_result.rating <= Threshold) {
932  if (ConfigIsPermanent(Class, int_result.config)) {
934  tprintf("Found good match to perm config %d = %4.1f%%.\n",
935  int_result.config, int_result.rating * 100.0);
936  FreeFeatureSet(FloatFeatures);
937  return;
938  }
939 
940  TempConfig = TempConfigFor(Class, int_result.config);
941  IncreaseConfidence(TempConfig);
942  if (TempConfig->NumTimesSeen > Class->MaxNumTimesSeen) {
943  Class->MaxNumTimesSeen = TempConfig->NumTimesSeen;
944  }
946  tprintf("Increasing reliability of temp config %d to %d.\n",
947  int_result.config, TempConfig->NumTimesSeen);
948 
949  if (TempConfigReliable(ClassId, TempConfig)) {
950  MakePermanent(adaptive_templates, ClassId, int_result.config, Blob);
951  UpdateAmbigsGroup(ClassId, Blob);
952  }
953  } else {
955  tprintf("Found poor match to temp config %d = %4.1f%%.\n",
956  int_result.config, int_result.rating * 100.0);
958  DisplayAdaptedChar(Blob, IClass);
959  }
960  NewTempConfigId =
961  MakeNewTemporaryConfig(adaptive_templates, ClassId, FontinfoId,
962  NumFeatures, IntFeatures, FloatFeatures);
963  if (NewTempConfigId >= 0 &&
964  TempConfigReliable(ClassId, TempConfigFor(Class, NewTempConfigId))) {
965  MakePermanent(adaptive_templates, ClassId, NewTempConfigId, Blob);
966  UpdateAmbigsGroup(ClassId, Blob);
967  }
968 
969 #ifndef GRAPHICS_DISABLED
971  DisplayAdaptedChar(Blob, IClass);
972  }
973 #endif
974  }
975  FreeFeatureSet(FloatFeatures);
976  }
977 } /* AdaptToChar */
978 
980 #ifndef GRAPHICS_DISABLED
981  INT_FX_RESULT_STRUCT fx_info;
985  &bl_features);
986  if (sample == NULL) return;
987 
988  UnicharRating int_result;
989  im_.Match(int_class, AllProtosOn, AllConfigsOn,
990  bl_features.size(), &bl_features[0],
993  tprintf("Best match to temp config %d = %4.1f%%.\n",
994  int_result.config, int_result.rating * 100.0);
996  uinT32 ConfigMask;
997  ConfigMask = 1 << int_result.config;
999  im_.Match(int_class, AllProtosOn, (BIT_VECTOR)&ConfigMask,
1000  bl_features.size(), &bl_features[0],
1001  &int_result, classify_adapt_feature_threshold,
1002  6 | 0x19, matcher_debug_separate_windows);
1004  }
1005 
1006  delete sample;
1007 #endif
1008 }
1009 
1030 void Classify::AddNewResult(const UnicharRating& new_result,
1031  ADAPT_RESULTS *results) {
1032  int old_match = FindScoredUnichar(new_result.unichar_id, *results);
1033 
1034  if (new_result.rating + matcher_bad_match_pad < results->best_rating ||
1035  (old_match < results->match.size() &&
1036  new_result.rating <= results->match[old_match].rating))
1037  return; // New one not good enough.
1038 
1039  if (!unicharset.get_fragment(new_result.unichar_id))
1040  results->HasNonfragment = true;
1041 
1042  if (old_match < results->match.size()) {
1043  results->match[old_match].rating = new_result.rating;
1044  } else {
1045  results->match.push_back(new_result);
1046  }
1047 
1048  if (new_result.rating > results->best_rating &&
1049  // Ensure that fragments do not affect best rating, class and config.
1050  // This is needed so that at least one non-fragmented character is
1051  // always present in the results.
1052  // TODO(daria): verify that this helps accuracy and does not
1053  // hurt performance.
1054  !unicharset.get_fragment(new_result.unichar_id)) {
1055  results->best_match_index = old_match;
1056  results->best_rating = new_result.rating;
1057  results->best_unichar_id = new_result.unichar_id;
1058  }
1059 } /* AddNewResult */
1060 
1061 
1062 /*---------------------------------------------------------------------------*/
1085  const GenericVector<INT_FEATURE_STRUCT>& int_features,
1086  const INT_FX_RESULT_STRUCT& fx_info,
1087  const TBLOB *blob,
1088  INT_TEMPLATES templates,
1089  ADAPT_CLASS *classes,
1090  UNICHAR_ID *ambiguities,
1091  ADAPT_RESULTS *results) {
1092  if (int_features.empty()) return;
1093  uinT8* CharNormArray = new uinT8[unicharset.size()];
1094  UnicharRating int_result;
1095 
1096  results->BlobLength = GetCharNormFeature(fx_info, templates, NULL,
1097  CharNormArray);
1098  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1099  if (debug)
1100  tprintf("AM Matches = ");
1101 
1102  int top = blob->bounding_box().top();
1103  int bottom = blob->bounding_box().bottom();
1104  while (*ambiguities >= 0) {
1105  CLASS_ID class_id = *ambiguities;
1106 
1107  int_result.unichar_id = class_id;
1108  im_.Match(ClassForClassId(templates, class_id),
1110  int_features.size(), &int_features[0],
1111  &int_result,
1114 
1115  ExpandShapesAndApplyCorrections(NULL, debug, class_id, bottom, top, 0,
1116  results->BlobLength,
1118  CharNormArray, &int_result, results);
1119  ambiguities++;
1120  }
1121  delete [] CharNormArray;
1122 } /* AmbigClassifier */
1123 
1124 /*---------------------------------------------------------------------------*/
1128  inT16 num_features,
1129  const INT_FEATURE_STRUCT* features,
1130  const uinT8* norm_factors,
1131  ADAPT_CLASS* classes,
1132  int debug,
1133  int matcher_multiplier,
1134  const TBOX& blob_box,
1135  const GenericVector<CP_RESULT_STRUCT>& results,
1136  ADAPT_RESULTS* final_results) {
1137  int top = blob_box.top();
1138  int bottom = blob_box.bottom();
1139  UnicharRating int_result;
1140  for (int c = 0; c < results.size(); c++) {
1141  CLASS_ID class_id = results[c].Class;
1142  BIT_VECTOR protos = classes != NULL ? classes[class_id]->PermProtos
1143  : AllProtosOn;
1144  BIT_VECTOR configs = classes != NULL ? classes[class_id]->PermConfigs
1145  : AllConfigsOn;
1146 
1147  int_result.unichar_id = class_id;
1148  im_.Match(ClassForClassId(templates, class_id),
1149  protos, configs,
1150  num_features, features,
1151  &int_result, classify_adapt_feature_threshold, debug,
1153  bool debug = matcher_debug_level >= 2 || classify_debug_level > 1;
1154  ExpandShapesAndApplyCorrections(classes, debug, class_id, bottom, top,
1155  results[c].Rating,
1156  final_results->BlobLength,
1157  matcher_multiplier, norm_factors,
1158  &int_result, final_results);
1159  }
1160 }
1161 
1162 // Converts configs to fonts, and if the result is not adapted, and a
1163 // shape_table_ is present, the shape is expanded to include all
1164 // unichar_ids represented, before applying a set of corrections to the
1165 // distance rating in int_result, (see ComputeCorrectedRating.)
1166 // The results are added to the final_results output.
1168  ADAPT_CLASS* classes, bool debug, int class_id, int bottom, int top,
1169  float cp_rating, int blob_length, int matcher_multiplier,
1170  const uinT8* cn_factors,
1171  UnicharRating* int_result, ADAPT_RESULTS* final_results) {
1172  if (classes != NULL) {
1173  // Adapted result. Convert configs to fontinfo_ids.
1174  int_result->adapted = true;
1175  for (int f = 0; f < int_result->fonts.size(); ++f) {
1176  int_result->fonts[f].fontinfo_id =
1177  GetFontinfoId(classes[class_id], int_result->fonts[f].fontinfo_id);
1178  }
1179  } else {
1180  // Pre-trained result. Map fonts using font_sets_.
1181  int_result->adapted = false;
1182  for (int f = 0; f < int_result->fonts.size(); ++f) {
1183  int_result->fonts[f].fontinfo_id =
1185  int_result->fonts[f].fontinfo_id);
1186  }
1187  if (shape_table_ != NULL) {
1188  // Two possible cases:
1189  // 1. Flat shapetable. All unichar-ids of the shapes referenced by
1190  // int_result->fonts are the same. In this case build a new vector of
1191  // mapped fonts and replace the fonts in int_result.
1192  // 2. Multi-unichar shapetable. Variable unichars in the shapes referenced
1193  // by int_result. In this case, build a vector of UnicharRating to
1194  // gather together different font-ids for each unichar. Also covers case1.
1195  GenericVector<UnicharRating> mapped_results;
1196  for (int f = 0; f < int_result->fonts.size(); ++f) {
1197  int shape_id = int_result->fonts[f].fontinfo_id;
1198  const Shape& shape = shape_table_->GetShape(shape_id);
1199  for (int c = 0; c < shape.size(); ++c) {
1200  int unichar_id = shape[c].unichar_id;
1201  if (!unicharset.get_enabled(unichar_id)) continue;
1202  // Find the mapped_result for unichar_id.
1203  int r = 0;
1204  for (r = 0; r < mapped_results.size() &&
1205  mapped_results[r].unichar_id != unichar_id; ++r) {}
1206  if (r == mapped_results.size()) {
1207  mapped_results.push_back(*int_result);
1208  mapped_results[r].unichar_id = unichar_id;
1209  mapped_results[r].fonts.truncate(0);
1210  }
1211  for (int i = 0; i < shape[c].font_ids.size(); ++i) {
1212  mapped_results[r].fonts.push_back(
1213  ScoredFont(shape[c].font_ids[i], int_result->fonts[f].score));
1214  }
1215  }
1216  }
1217  for (int m = 0; m < mapped_results.size(); ++m) {
1218  mapped_results[m].rating =
1219  ComputeCorrectedRating(debug, mapped_results[m].unichar_id,
1220  cp_rating, int_result->rating,
1221  int_result->feature_misses, bottom, top,
1222  blob_length, matcher_multiplier, cn_factors);
1223  AddNewResult(mapped_results[m], final_results);
1224  }
1225  return;
1226  }
1227  }
1228  if (unicharset.get_enabled(class_id)) {
1229  int_result->rating = ComputeCorrectedRating(debug, class_id, cp_rating,
1230  int_result->rating,
1231  int_result->feature_misses,
1232  bottom, top, blob_length,
1233  matcher_multiplier, cn_factors);
1234  AddNewResult(*int_result, final_results);
1235  }
1236 }
1237 
1238 // Applies a set of corrections to the confidence im_rating,
1239 // including the cn_correction, miss penalty and additional penalty
1240 // for non-alnums being vertical misfits. Returns the corrected confidence.
1241 double Classify::ComputeCorrectedRating(bool debug, int unichar_id,
1242  double cp_rating, double im_rating,
1243  int feature_misses,
1244  int bottom, int top,
1245  int blob_length, int matcher_multiplier,
1246  const uinT8* cn_factors) {
1247  // Compute class feature corrections.
1248  double cn_corrected = im_.ApplyCNCorrection(1.0 - im_rating, blob_length,
1249  cn_factors[unichar_id],
1250  matcher_multiplier);
1251  double miss_penalty = tessedit_class_miss_scale * feature_misses;
1252  double vertical_penalty = 0.0;
1253  // Penalize non-alnums for being vertical misfits.
1254  if (!unicharset.get_isalpha(unichar_id) &&
1255  !unicharset.get_isdigit(unichar_id) &&
1256  cn_factors[unichar_id] != 0 && classify_misfit_junk_penalty > 0.0) {
1257  int min_bottom, max_bottom, min_top, max_top;
1258  unicharset.get_top_bottom(unichar_id, &min_bottom, &max_bottom,
1259  &min_top, &max_top);
1260  if (debug) {
1261  tprintf("top=%d, vs [%d, %d], bottom=%d, vs [%d, %d]\n",
1262  top, min_top, max_top, bottom, min_bottom, max_bottom);
1263  }
1264  if (top < min_top || top > max_top ||
1265  bottom < min_bottom || bottom > max_bottom) {
1266  vertical_penalty = classify_misfit_junk_penalty;
1267  }
1268  }
1269  double result = 1.0 - (cn_corrected + miss_penalty + vertical_penalty);
1270  if (result < WORST_POSSIBLE_RATING)
1271  result = WORST_POSSIBLE_RATING;
1272  if (debug) {
1273  tprintf("%s: %2.1f%%(CP%2.1f, IM%2.1f + CN%.2f(%d) + MP%2.1f + VP%2.1f)\n",
1274  unicharset.id_to_unichar(unichar_id),
1275  result * 100.0,
1276  cp_rating * 100.0,
1277  (1.0 - im_rating) * 100.0,
1278  (cn_corrected - (1.0 - im_rating)) * 100.0,
1279  cn_factors[unichar_id],
1280  miss_penalty * 100.0,
1281  vertical_penalty * 100.0);
1282  }
1283  return result;
1284 }
1285 
1286 /*---------------------------------------------------------------------------*/
1307  TBLOB *Blob, const GenericVector<INT_FEATURE_STRUCT>& int_features,
1308  const INT_FX_RESULT_STRUCT& fx_info,
1309  ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results) {
1310  if (int_features.empty()) return NULL;
1311  uinT8* CharNormArray = new uinT8[unicharset.size()];
1312  ClearCharNormArray(CharNormArray);
1313 
1315  PruneClasses(Templates->Templates, int_features.size(), -1, &int_features[0],
1316  CharNormArray, BaselineCutoffs, &Results->CPResults);
1317 
1318  if (matcher_debug_level >= 2 || classify_debug_level > 1)
1319  tprintf("BL Matches = ");
1320 
1321  MasterMatcher(Templates->Templates, int_features.size(), &int_features[0],
1322  CharNormArray,
1323  Templates->Class, matcher_debug_flags, 0,
1324  Blob->bounding_box(), Results->CPResults, Results);
1325 
1326  delete [] CharNormArray;
1327  CLASS_ID ClassId = Results->best_unichar_id;
1328  if (ClassId == INVALID_UNICHAR_ID || Results->best_match_index < 0)
1329  return NULL;
1330 
1331  return Templates->Class[ClassId]->
1332  Config[Results->match[Results->best_match_index].config].Perm->Ambigs;
1333 } /* BaselineClassifier */
1334 
1335 
1336 /*---------------------------------------------------------------------------*/
1356  const TrainingSample& sample,
1357  ADAPT_RESULTS *adapt_results) {
1358  // This is the length that is used for scaling ratings vs certainty.
1359  adapt_results->BlobLength =
1360  IntCastRounded(sample.outline_length() / kStandardFeatureLength);
1361  GenericVector<UnicharRating> unichar_results;
1362  static_classifier_->UnicharClassifySample(sample, blob->denorm().pix(), 0,
1363  -1, &unichar_results);
1364  // Convert results to the format used internally by AdaptiveClassifier.
1365  for (int r = 0; r < unichar_results.size(); ++r) {
1366  AddNewResult(unichar_results[r], adapt_results);
1367  }
1368  return sample.num_features();
1369 } /* CharNormClassifier */
1370 
1371 // As CharNormClassifier, but operates on a TrainingSample and outputs to
1372 // a GenericVector of ShapeRating without conversion to classes.
1374  int keep_this,
1375  const TrainingSample& sample,
1376  GenericVector<UnicharRating>* results) {
1377  results->clear();
1378  ADAPT_RESULTS* adapt_results = new ADAPT_RESULTS();
1379  adapt_results->Initialize();
1380  // Compute the bounding box of the features.
1381  int num_features = sample.num_features();
1382  // Only the top and bottom of the blob_box are used by MasterMatcher, so
1383  // fabricate right and left using top and bottom.
1384  TBOX blob_box(sample.geo_feature(GeoBottom), sample.geo_feature(GeoBottom),
1385  sample.geo_feature(GeoTop), sample.geo_feature(GeoTop));
1386  // Compute the char_norm_array from the saved cn_feature.
1387  FEATURE norm_feature = sample.GetCNFeature();
1388  uinT8* char_norm_array = new uinT8[unicharset.size()];
1389  int num_pruner_classes = MAX(unicharset.size(),
1391  uinT8* pruner_norm_array = new uinT8[num_pruner_classes];
1392  adapt_results->BlobLength =
1393  static_cast<int>(ActualOutlineLength(norm_feature) * 20 + 0.5);
1394  ComputeCharNormArrays(norm_feature, PreTrainedTemplates, char_norm_array,
1395  pruner_norm_array);
1396 
1397  PruneClasses(PreTrainedTemplates, num_features, keep_this, sample.features(),
1398  pruner_norm_array,
1399  shape_table_ != NULL ? &shapetable_cutoffs_[0] : CharNormCutoffs,
1400  &adapt_results->CPResults);
1401  delete [] pruner_norm_array;
1402  if (keep_this >= 0) {
1403  adapt_results->CPResults[0].Class = keep_this;
1404  adapt_results->CPResults.truncate(1);
1405  }
1406  if (pruner_only) {
1407  // Convert pruner results to output format.
1408  for (int i = 0; i < adapt_results->CPResults.size(); ++i) {
1409  int class_id = adapt_results->CPResults[i].Class;
1410  results->push_back(
1411  UnicharRating(class_id, 1.0f - adapt_results->CPResults[i].Rating));
1412  }
1413  } else {
1414  MasterMatcher(PreTrainedTemplates, num_features, sample.features(),
1415  char_norm_array,
1416  NULL, matcher_debug_flags,
1418  blob_box, adapt_results->CPResults, adapt_results);
1419  // Convert master matcher results to output format.
1420  for (int i = 0; i < adapt_results->match.size(); i++) {
1421  results->push_back(adapt_results->match[i]);
1422  }
1424  }
1425  delete [] char_norm_array;
1426  delete adapt_results;
1427  return num_features;
1428 } /* CharNormTrainingSample */
1429 
1430 
1431 /*---------------------------------------------------------------------------*/
1447  float rating = results->BlobLength / matcher_avg_noise_size;
1448  rating *= rating;
1449  rating /= 1.0 + rating;
1450 
1451  AddNewResult(UnicharRating(UNICHAR_SPACE, 1.0f - rating), results);
1452 } /* ClassifyAsNoise */
1453 
1460 void Classify::ConvertMatchesToChoices(const DENORM& denorm, const TBOX& box,
1461  ADAPT_RESULTS *Results,
1462  BLOB_CHOICE_LIST *Choices) {
1463  assert(Choices != NULL);
1464  FLOAT32 Rating;
1465  FLOAT32 Certainty;
1466  BLOB_CHOICE_IT temp_it;
1467  bool contains_nonfrag = false;
1468  temp_it.set_to_list(Choices);
1469  int choices_length = 0;
1470  // With no shape_table_ maintain the previous MAX_MATCHES as the maximum
1471  // number of returned results, but with a shape_table_ we want to have room
1472  // for at least the biggest shape (which might contain hundreds of Indic
1473  // grapheme fragments) and more, so use double the size of the biggest shape
1474  // if that is more than the default.
1475  int max_matches = MAX_MATCHES;
1476  if (shape_table_ != NULL) {
1477  max_matches = shape_table_->MaxNumUnichars() * 2;
1478  if (max_matches < MAX_MATCHES)
1479  max_matches = MAX_MATCHES;
1480  }
1481 
1482  float best_certainty = -MAX_FLOAT32;
1483  for (int i = 0; i < Results->match.size(); i++) {
1484  const UnicharRating& result = Results->match[i];
1485  bool adapted = result.adapted;
1486  bool current_is_frag = (unicharset.get_fragment(result.unichar_id) != NULL);
1487  if (temp_it.length()+1 == max_matches &&
1488  !contains_nonfrag && current_is_frag) {
1489  continue; // look for a non-fragmented character to fill the
1490  // last spot in Choices if only fragments are present
1491  }
1492  // BlobLength can never be legally 0, this means recognition failed.
1493  // But we must return a classification result because some invoking
1494  // functions (chopper/permuter) do not anticipate a null blob choice.
1495  // So we need to assign a poor, but not infinitely bad score.
1496  if (Results->BlobLength == 0) {
1497  Certainty = -20;
1498  Rating = 100; // should be -certainty * real_blob_length
1499  } else {
1500  Rating = Certainty = (1.0f - result.rating);
1501  Rating *= rating_scale * Results->BlobLength;
1502  Certainty *= -(getDict().certainty_scale);
1503  }
1504  // Adapted results, by their very nature, should have good certainty.
1505  // Those that don't are at best misleading, and often lead to errors,
1506  // so don't accept adapted results that are too far behind the best result,
1507  // whether adapted or static.
1508  // TODO(rays) find some way of automatically tuning these constants.
1509  if (Certainty > best_certainty) {
1510  best_certainty = MIN(Certainty, classify_adapted_pruning_threshold);
1511  } else if (adapted &&
1512  Certainty / classify_adapted_pruning_factor < best_certainty) {
1513  continue; // Don't accept bad adapted results.
1514  }
1515 
1516  float min_xheight, max_xheight, yshift;
1517  denorm.XHeightRange(result.unichar_id, unicharset, box,
1518  &min_xheight, &max_xheight, &yshift);
1519  BLOB_CHOICE* choice =
1520  new BLOB_CHOICE(result.unichar_id, Rating, Certainty,
1522  min_xheight, max_xheight, yshift,
1523  adapted ? BCC_ADAPTED_CLASSIFIER
1525  choice->set_fonts(result.fonts);
1526  temp_it.add_to_end(choice);
1527  contains_nonfrag |= !current_is_frag; // update contains_nonfrag
1528  choices_length++;
1529  if (choices_length >= max_matches) break;
1530  }
1531  Results->match.truncate(choices_length);
1532 } // ConvertMatchesToChoices
1533 
1534 
1535 /*---------------------------------------------------------------------------*/
1536 #ifndef GRAPHICS_DISABLED
1537 
1548  ADAPT_RESULTS *Results) {
1549  if (static_classifier_ == NULL) return;
1550  INT_FX_RESULT_STRUCT fx_info;
1553  BlobToTrainingSample(*blob, false, &fx_info, &bl_features);
1554  if (sample == NULL) return;
1555  static_classifier_->DebugDisplay(*sample, blob->denorm().pix(),
1556  Results->best_unichar_id);
1557 } /* DebugAdaptiveClassifier */
1558 #endif
1559 
1560 /*---------------------------------------------------------------------------*/
1584  UNICHAR_ID *Ambiguities;
1585 
1586  INT_FX_RESULT_STRUCT fx_info;
1590  &bl_features);
1591  if (sample == NULL) return;
1592 
1594  tess_cn_matching) {
1595  CharNormClassifier(Blob, *sample, Results);
1596  } else {
1597  Ambiguities = BaselineClassifier(Blob, bl_features, fx_info,
1598  AdaptedTemplates, Results);
1599  if ((!Results->match.empty() &&
1600  MarginalMatch(Results->best_rating,
1602  !tess_bn_matching) ||
1603  Results->match.empty()) {
1604  CharNormClassifier(Blob, *sample, Results);
1605  } else if (Ambiguities && *Ambiguities >= 0 && !tess_bn_matching) {
1606  AmbigClassifier(bl_features, fx_info, Blob,
1609  Ambiguities,
1610  Results);
1611  }
1612  }
1613 
1614  // Force the blob to be classified as noise
1615  // if the results contain only fragments.
1616  // TODO(daria): verify that this is better than
1617  // just adding a NULL classification.
1618  if (!Results->HasNonfragment || Results->match.empty())
1619  ClassifyAsNoise(Results);
1620  delete sample;
1621 } /* DoAdaptiveMatch */
1622 
1623 /*---------------------------------------------------------------------------*/
1641  CLASS_ID CorrectClass) {
1642  ADAPT_RESULTS *Results = new ADAPT_RESULTS();
1643  UNICHAR_ID *Ambiguities;
1644  int i;
1645 
1646  Results->Initialize();
1647  INT_FX_RESULT_STRUCT fx_info;
1651  &bl_features);
1652  if (sample == NULL) {
1653  delete Results;
1654  return NULL;
1655  }
1656 
1657  CharNormClassifier(Blob, *sample, Results);
1658  delete sample;
1659  RemoveBadMatches(Results);
1661 
1662  /* copy the class id's into an string of ambiguities - don't copy if
1663  the correct class is the only class id matched */
1664  Ambiguities = new UNICHAR_ID[Results->match.size() + 1];
1665  if (Results->match.size() > 1 ||
1666  (Results->match.size() == 1 &&
1667  Results->match[0].unichar_id != CorrectClass)) {
1668  for (i = 0; i < Results->match.size(); i++)
1669  Ambiguities[i] = Results->match[i].unichar_id;
1670  Ambiguities[i] = -1;
1671  } else {
1672  Ambiguities[0] = -1;
1673  }
1674 
1675  delete Results;
1676  return Ambiguities;
1677 } /* GetAmbiguities */
1678 
1679 // Returns true if the given blob looks too dissimilar to any character
1680 // present in the classifier templates.
1682  BLOB_CHOICE_LIST *ratings = new BLOB_CHOICE_LIST();
1683  AdaptiveClassifier(blob, ratings);
1684  BLOB_CHOICE_IT ratings_it(ratings);
1687  print_ratings_list("======================\nLooksLikeGarbage() got ",
1688  ratings, unicharset);
1689  }
1690  for (ratings_it.mark_cycle_pt(); !ratings_it.cycled_list();
1691  ratings_it.forward()) {
1692  if (unicharset.get_fragment(ratings_it.data()->unichar_id()) != NULL) {
1693  continue;
1694  }
1695  float certainty = ratings_it.data()->certainty();
1696  delete ratings;
1697  return certainty <
1699  }
1700  delete ratings;
1701  return true; // no whole characters in ratings
1702 }
1703 
1704 /*---------------------------------------------------------------------------*/
1729  INT_TEMPLATES templates,
1730  uinT8* pruner_norm_array,
1731  uinT8* char_norm_array) {
1732  FEATURE norm_feature = NewFeature(&CharNormDesc);
1733  float baseline = kBlnBaselineOffset;
1734  float scale = MF_SCALE_FACTOR;
1735  norm_feature->Params[CharNormY] = (fx_info.Ymean - baseline) * scale;
1736  norm_feature->Params[CharNormLength] =
1737  fx_info.Length * scale / LENGTH_COMPRESSION;
1738  norm_feature->Params[CharNormRx] = fx_info.Rx * scale;
1739  norm_feature->Params[CharNormRy] = fx_info.Ry * scale;
1740  // Deletes norm_feature.
1741  ComputeCharNormArrays(norm_feature, templates, char_norm_array,
1742  pruner_norm_array);
1743  return IntCastRounded(fx_info.Length / kStandardFeatureLength);
1744 } /* GetCharNormFeature */
1745 
1746 // Computes the char_norm_array for the unicharset and, if not NULL, the
1747 // pruner_array as appropriate according to the existence of the shape_table.
1749  INT_TEMPLATES_STRUCT* templates,
1750  uinT8* char_norm_array,
1751  uinT8* pruner_array) {
1752  ComputeIntCharNormArray(*norm_feature, char_norm_array);
1753  if (pruner_array != NULL) {
1754  if (shape_table_ == NULL) {
1755  ComputeIntCharNormArray(*norm_feature, pruner_array);
1756  } else {
1757  memset(pruner_array, MAX_UINT8,
1758  templates->NumClasses * sizeof(pruner_array[0]));
1759  // Each entry in the pruner norm array is the MIN of all the entries of
1760  // the corresponding unichars in the CharNormArray.
1761  for (int id = 0; id < templates->NumClasses; ++id) {
1762  int font_set_id = templates->Class[id]->font_set_id;
1763  const FontSet &fs = fontset_table_.get(font_set_id);
1764  for (int config = 0; config < fs.size; ++config) {
1765  const Shape& shape = shape_table_->GetShape(fs.configs[config]);
1766  for (int c = 0; c < shape.size(); ++c) {
1767  if (char_norm_array[shape[c].unichar_id] < pruner_array[id])
1768  pruner_array[id] = char_norm_array[shape[c].unichar_id];
1769  }
1770  }
1771  }
1772  }
1773  }
1774  FreeFeature(norm_feature);
1775 }
1776 
1777 /*---------------------------------------------------------------------------*/
1793  CLASS_ID ClassId,
1794  int FontinfoId,
1795  int NumFeatures,
1796  INT_FEATURE_ARRAY Features,
1797  FEATURE_SET FloatFeatures) {
1798  INT_CLASS IClass;
1799  ADAPT_CLASS Class;
1800  PROTO_ID OldProtos[MAX_NUM_PROTOS];
1801  FEATURE_ID BadFeatures[MAX_NUM_INT_FEATURES];
1802  int NumOldProtos;
1803  int NumBadFeatures;
1804  int MaxProtoId, OldMaxProtoId;
1805  int BlobLength = 0;
1806  int MaskSize;
1807  int ConfigId;
1809  int i;
1810  int debug_level = NO_DEBUG;
1811 
1813  debug_level =
1815 
1816  IClass = ClassForClassId(Templates->Templates, ClassId);
1817  Class = Templates->Class[ClassId];
1818 
1819  if (IClass->NumConfigs >= MAX_NUM_CONFIGS) {
1820  ++NumAdaptationsFailed;
1822  cprintf("Cannot make new temporary config: maximum number exceeded.\n");
1823  return -1;
1824  }
1825 
1826  OldMaxProtoId = IClass->NumProtos - 1;
1827 
1828  NumOldProtos = im_.FindGoodProtos(IClass, AllProtosOn, AllConfigsOff,
1829  BlobLength, NumFeatures, Features,
1830  OldProtos, classify_adapt_proto_threshold,
1831  debug_level);
1832 
1833  MaskSize = WordsInVectorOfSize(MAX_NUM_PROTOS);
1834  zero_all_bits(TempProtoMask, MaskSize);
1835  for (i = 0; i < NumOldProtos; i++)
1836  SET_BIT(TempProtoMask, OldProtos[i]);
1837 
1838  NumBadFeatures = im_.FindBadFeatures(IClass, TempProtoMask, AllConfigsOn,
1839  BlobLength, NumFeatures, Features,
1840  BadFeatures,
1842  debug_level);
1843 
1844  MaxProtoId = MakeNewTempProtos(FloatFeatures, NumBadFeatures, BadFeatures,
1845  IClass, Class, TempProtoMask);
1846  if (MaxProtoId == NO_PROTO) {
1847  ++NumAdaptationsFailed;
1849  cprintf("Cannot make new temp protos: maximum number exceeded.\n");
1850  return -1;
1851  }
1852 
1853  ConfigId = AddIntConfig(IClass);
1854  ConvertConfig(TempProtoMask, ConfigId, IClass);
1855  Config = NewTempConfig(MaxProtoId, FontinfoId);
1856  TempConfigFor(Class, ConfigId) = Config;
1857  copy_all_bits(TempProtoMask, Config->Protos, Config->ProtoVectorSize);
1858 
1860  cprintf("Making new temp config %d fontinfo id %d"
1861  " using %d old and %d new protos.\n",
1862  ConfigId, Config->FontinfoId,
1863  NumOldProtos, MaxProtoId - OldMaxProtoId);
1864 
1865  return ConfigId;
1866 } /* MakeNewTemporaryConfig */
1867 
1868 /*---------------------------------------------------------------------------*/
1890  int NumBadFeat,
1891  FEATURE_ID BadFeat[],
1892  INT_CLASS IClass,
1893  ADAPT_CLASS Class,
1894  BIT_VECTOR TempProtoMask) {
1895  FEATURE_ID *ProtoStart;
1896  FEATURE_ID *ProtoEnd;
1897  FEATURE_ID *LastBad;
1898  TEMP_PROTO TempProto;
1899  PROTO Proto;
1900  FEATURE F1, F2;
1901  FLOAT32 X1, X2, Y1, Y2;
1902  FLOAT32 A1, A2, AngleDelta;
1903  FLOAT32 SegmentLength;
1904  PROTO_ID Pid;
1905 
1906  for (ProtoStart = BadFeat, LastBad = ProtoStart + NumBadFeat;
1907  ProtoStart < LastBad; ProtoStart = ProtoEnd) {
1908  F1 = Features->Features[*ProtoStart];
1909  X1 = F1->Params[PicoFeatX];
1910  Y1 = F1->Params[PicoFeatY];
1911  A1 = F1->Params[PicoFeatDir];
1912 
1913  for (ProtoEnd = ProtoStart + 1,
1914  SegmentLength = GetPicoFeatureLength();
1915  ProtoEnd < LastBad;
1916  ProtoEnd++, SegmentLength += GetPicoFeatureLength()) {
1917  F2 = Features->Features[*ProtoEnd];
1918  X2 = F2->Params[PicoFeatX];
1919  Y2 = F2->Params[PicoFeatY];
1920  A2 = F2->Params[PicoFeatDir];
1921 
1922  AngleDelta = fabs(A1 - A2);
1923  if (AngleDelta > 0.5)
1924  AngleDelta = 1.0 - AngleDelta;
1925 
1926  if (AngleDelta > matcher_clustering_max_angle_delta ||
1927  fabs(X1 - X2) > SegmentLength ||
1928  fabs(Y1 - Y2) > SegmentLength)
1929  break;
1930  }
1931 
1932  F2 = Features->Features[*(ProtoEnd - 1)];
1933  X2 = F2->Params[PicoFeatX];
1934  Y2 = F2->Params[PicoFeatY];
1935  A2 = F2->Params[PicoFeatDir];
1936 
1937  Pid = AddIntProto(IClass);
1938  if (Pid == NO_PROTO)
1939  return (NO_PROTO);
1940 
1941  TempProto = NewTempProto();
1942  Proto = &(TempProto->Proto);
1943 
1944  /* compute proto params - NOTE that Y_DIM_OFFSET must be used because
1945  ConvertProto assumes that the Y dimension varies from -0.5 to 0.5
1946  instead of the -0.25 to 0.75 used in baseline normalization */
1947  Proto->Length = SegmentLength;
1948  Proto->Angle = A1;
1949  Proto->X = (X1 + X2) / 2.0;
1950  Proto->Y = (Y1 + Y2) / 2.0 - Y_DIM_OFFSET;
1951  FillABC(Proto);
1952 
1953  TempProto->ProtoId = Pid;
1954  SET_BIT(TempProtoMask, Pid);
1955 
1956  ConvertProto(Proto, Pid, IClass);
1957  AddProtoToProtoPruner(Proto, Pid, IClass,
1959 
1960  Class->TempProtos = push(Class->TempProtos, TempProto);
1961  }
1962  return IClass->NumProtos - 1;
1963 } /* MakeNewTempProtos */
1964 
1965 /*---------------------------------------------------------------------------*/
1979  CLASS_ID ClassId,
1980  int ConfigId,
1981  TBLOB *Blob) {
1982  UNICHAR_ID *Ambigs;
1984  ADAPT_CLASS Class;
1985  PROTO_KEY ProtoKey;
1986 
1987  Class = Templates->Class[ClassId];
1988  Config = TempConfigFor(Class, ConfigId);
1989 
1990  MakeConfigPermanent(Class, ConfigId);
1991  if (Class->NumPermConfigs == 0)
1992  Templates->NumPermClasses++;
1993  Class->NumPermConfigs++;
1994 
1995  // Initialize permanent config.
1996  Ambigs = GetAmbiguities(Blob, ClassId);
1998  "PERM_CONFIG_STRUCT");
1999  Perm->Ambigs = Ambigs;
2000  Perm->FontinfoId = Config->FontinfoId;
2001 
2002  // Free memory associated with temporary config (since ADAPTED_CONFIG
2003  // is a union we need to clean up before we record permanent config).
2004  ProtoKey.Templates = Templates;
2005  ProtoKey.ClassId = ClassId;
2006  ProtoKey.ConfigId = ConfigId;
2007  Class->TempProtos = delete_d(Class->TempProtos, &ProtoKey, MakeTempProtoPerm);
2009 
2010  // Record permanent config.
2011  PermConfigFor(Class, ConfigId) = Perm;
2012 
2013  if (classify_learning_debug_level >= 1) {
2014  tprintf("Making config %d for %s (ClassId %d) permanent:"
2015  " fontinfo id %d, ambiguities '",
2016  ConfigId, getDict().getUnicharset().debug_str(ClassId).string(),
2017  ClassId, PermConfigFor(Class, ConfigId)->FontinfoId);
2018  for (UNICHAR_ID *AmbigsPointer = Ambigs;
2019  *AmbigsPointer >= 0; ++AmbigsPointer)
2020  tprintf("%s", unicharset.id_to_unichar(*AmbigsPointer));
2021  tprintf("'.\n");
2022  }
2023 } /* MakePermanent */
2024 } // namespace tesseract
2025 
2026 /*---------------------------------------------------------------------------*/
2041 int MakeTempProtoPerm(void *item1, void *item2) {
2042  ADAPT_CLASS Class;
2044  TEMP_PROTO TempProto;
2045  PROTO_KEY *ProtoKey;
2046 
2047  TempProto = (TEMP_PROTO) item1;
2048  ProtoKey = (PROTO_KEY *) item2;
2049 
2050  Class = ProtoKey->Templates->Class[ProtoKey->ClassId];
2051  Config = TempConfigFor(Class, ProtoKey->ConfigId);
2052 
2053  if (TempProto->ProtoId > Config->MaxProtoId ||
2054  !test_bit (Config->Protos, TempProto->ProtoId))
2055  return FALSE;
2056 
2057  MakeProtoPermanent(Class, TempProto->ProtoId);
2058  AddProtoToClassPruner(&(TempProto->Proto), ProtoKey->ClassId,
2059  ProtoKey->Templates->Templates);
2060  FreeTempProto(TempProto);
2061 
2062  return TRUE;
2063 } /* MakeTempProtoPerm */
2064 
2065 /*---------------------------------------------------------------------------*/
2066 namespace tesseract {
2078  for (int i = 0; i < results.match.size(); ++i) {
2079  tprintf("%s ", unicharset.debug_str(results.match[i].unichar_id).string());
2080  results.match[i].Print();
2081  }
2082 } /* PrintAdaptiveMatchResults */
2083 
2084 /*---------------------------------------------------------------------------*/
2101  int Next, NextGood;
2102  FLOAT32 BadMatchThreshold;
2103  static const char* romans = "i v x I V X";
2104  BadMatchThreshold = Results->best_rating - matcher_bad_match_pad;
2105 
2107  UNICHAR_ID unichar_id_one = unicharset.contains_unichar("1") ?
2108  unicharset.unichar_to_id("1") : -1;
2109  UNICHAR_ID unichar_id_zero = unicharset.contains_unichar("0") ?
2110  unicharset.unichar_to_id("0") : -1;
2111  float scored_one = ScoredUnichar(unichar_id_one, *Results);
2112  float scored_zero = ScoredUnichar(unichar_id_zero, *Results);
2113 
2114  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2115  const UnicharRating& match = Results->match[Next];
2116  if (match.rating >= BadMatchThreshold) {
2117  if (!unicharset.get_isalpha(match.unichar_id) ||
2118  strstr(romans,
2119  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2120  } else if (unicharset.eq(match.unichar_id, "l") &&
2121  scored_one < BadMatchThreshold) {
2122  Results->match[Next].unichar_id = unichar_id_one;
2123  } else if (unicharset.eq(match.unichar_id, "O") &&
2124  scored_zero < BadMatchThreshold) {
2125  Results->match[Next].unichar_id = unichar_id_zero;
2126  } else {
2127  Results->match[Next].unichar_id = INVALID_UNICHAR_ID; // Don't copy.
2128  }
2129  if (Results->match[Next].unichar_id != INVALID_UNICHAR_ID) {
2130  if (NextGood == Next) {
2131  ++NextGood;
2132  } else {
2133  Results->match[NextGood++] = Results->match[Next];
2134  }
2135  }
2136  }
2137  }
2138  } else {
2139  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2140  if (Results->match[Next].rating >= BadMatchThreshold) {
2141  if (NextGood == Next) {
2142  ++NextGood;
2143  } else {
2144  Results->match[NextGood++] = Results->match[Next];
2145  }
2146  }
2147  }
2148  }
2149  Results->match.truncate(NextGood);
2150 } /* RemoveBadMatches */
2151 
2152 /*----------------------------------------------------------------------------*/
2163  int Next, NextGood;
2164  int punc_count; /*no of garbage characters */
2165  int digit_count;
2166  /*garbage characters */
2167  static char punc_chars[] = ". , ; : / ` ~ ' - = \\ | \" ! _ ^";
2168  static char digit_chars[] = "0 1 2 3 4 5 6 7 8 9";
2169 
2170  punc_count = 0;
2171  digit_count = 0;
2172  for (Next = NextGood = 0; Next < Results->match.size(); Next++) {
2173  const UnicharRating& match = Results->match[Next];
2174  bool keep = true;
2175  if (strstr(punc_chars,
2176  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2177  if (punc_count >= 2)
2178  keep = false;
2179  punc_count++;
2180  } else {
2181  if (strstr(digit_chars,
2182  unicharset.id_to_unichar(match.unichar_id)) != NULL) {
2183  if (digit_count >= 1)
2184  keep = false;
2185  digit_count++;
2186  }
2187  }
2188  if (keep) {
2189  if (NextGood == Next) {
2190  ++NextGood;
2191  } else {
2192  Results->match[NextGood++] = match;
2193  }
2194  }
2195  }
2196  Results->match.truncate(NextGood);
2197 } /* RemoveExtraPuncs */
2198 
2199 /*---------------------------------------------------------------------------*/
2214  Threshold = (Threshold == matcher_good_threshold) ? 0.9: (1.0 - Threshold);
2216  ClipToRange<int>(255 * Threshold, 0, 255));
2218  ClipToRange<int>(255 * Threshold, 0, 255));
2219 } /* SetAdaptiveThreshold */
2220 
2221 /*---------------------------------------------------------------------------*/
2234 void Classify::ShowBestMatchFor(int shape_id,
2235  const INT_FEATURE_STRUCT* features,
2236  int num_features) {
2237 #ifndef GRAPHICS_DISABLED
2238  uinT32 config_mask;
2239  if (UnusedClassIdIn(PreTrainedTemplates, shape_id)) {
2240  tprintf("No built-in templates for class/shape %d\n", shape_id);
2241  return;
2242  }
2243  if (num_features <= 0) {
2244  tprintf("Illegal blob (char norm features)!\n");
2245  return;
2246  }
2247  UnicharRating cn_result;
2248  classify_norm_method.set_value(character);
2251  num_features, features, &cn_result,
2254  tprintf("\n");
2255  config_mask = 1 << cn_result.config;
2256 
2257  tprintf("Static Shape ID: %d\n", shape_id);
2258  ShowMatchDisplay();
2260  AllProtosOn, reinterpret_cast<BIT_VECTOR>(&config_mask),
2261  num_features, features, &cn_result,
2266 #endif // GRAPHICS_DISABLED
2267 } /* ShowBestMatchFor */
2268 
2269 // Returns a string for the classifier class_id: either the corresponding
2270 // unicharset debug_str or the shape_table_ debug str.
2272  int class_id, int config_id) const {
2273  STRING class_string;
2274  if (templates == PreTrainedTemplates && shape_table_ != NULL) {
2275  int shape_id = ClassAndConfigIDToFontOrShapeID(class_id, config_id);
2276  class_string = shape_table_->DebugStr(shape_id);
2277  } else {
2278  class_string = unicharset.debug_str(class_id);
2279  }
2280  return class_string;
2281 }
2282 
2283 // Converts a classifier class_id index to a shape_table_ index
2285  int int_result_config) const {
2286  int font_set_id = PreTrainedTemplates->Class[class_id]->font_set_id;
2287  // Older inttemps have no font_ids.
2288  if (font_set_id < 0)
2289  return kBlankFontinfoId;
2290  const FontSet &fs = fontset_table_.get(font_set_id);
2291  ASSERT_HOST(int_result_config >= 0 && int_result_config < fs.size);
2292  return fs.configs[int_result_config];
2293 }
2294 
2295 // Converts a shape_table_ index to a classifier class_id index (not a
2296 // unichar-id!). Uses a search, so not fast.
2297 int Classify::ShapeIDToClassID(int shape_id) const {
2298  for (int id = 0; id < PreTrainedTemplates->NumClasses; ++id) {
2299  int font_set_id = PreTrainedTemplates->Class[id]->font_set_id;
2300  ASSERT_HOST(font_set_id >= 0);
2301  const FontSet &fs = fontset_table_.get(font_set_id);
2302  for (int config = 0; config < fs.size; ++config) {
2303  if (fs.configs[config] == shape_id)
2304  return id;
2305  }
2306  }
2307  tprintf("Shape %d not found\n", shape_id);
2308  return -1;
2309 }
2310 
2311 // Returns true if the given TEMP_CONFIG is good enough to make it
2312 // a permanent config.
2314  const TEMP_CONFIG &config) {
2315  if (classify_learning_debug_level >= 1) {
2316  tprintf("NumTimesSeen for config of %s is %d\n",
2317  getDict().getUnicharset().debug_str(class_id).string(),
2318  config->NumTimesSeen);
2319  }
2321  return true;
2322  } else if (config->NumTimesSeen < matcher_min_examples_for_prototyping) {
2323  return false;
2324  } else if (use_ambigs_for_adaption) {
2325  // Go through the ambigs vector and see whether we have already seen
2326  // enough times all the characters represented by the ambigs vector.
2327  const UnicharIdVector *ambigs =
2329  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2330  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2331  ADAPT_CLASS ambig_class = AdaptedTemplates->Class[(*ambigs)[ambig]];
2332  assert(ambig_class != NULL);
2333  if (ambig_class->NumPermConfigs == 0 &&
2334  ambig_class->MaxNumTimesSeen <
2336  if (classify_learning_debug_level >= 1) {
2337  tprintf("Ambig %s has not been seen enough times,"
2338  " not making config for %s permanent\n",
2339  getDict().getUnicharset().debug_str(
2340  (*ambigs)[ambig]).string(),
2341  getDict().getUnicharset().debug_str(class_id).string());
2342  }
2343  return false;
2344  }
2345  }
2346  }
2347  return true;
2348 }
2349 
2351  const UnicharIdVector *ambigs =
2353  int ambigs_size = (ambigs == NULL) ? 0 : ambigs->size();
2354  if (classify_learning_debug_level >= 1) {
2355  tprintf("Running UpdateAmbigsGroup for %s class_id=%d\n",
2356  getDict().getUnicharset().debug_str(class_id).string(), class_id);
2357  }
2358  for (int ambig = 0; ambig < ambigs_size; ++ambig) {
2359  CLASS_ID ambig_class_id = (*ambigs)[ambig];
2360  const ADAPT_CLASS ambigs_class = AdaptedTemplates->Class[ambig_class_id];
2361  for (int cfg = 0; cfg < MAX_NUM_CONFIGS; ++cfg) {
2362  if (ConfigIsPermanent(ambigs_class, cfg)) continue;
2363  const TEMP_CONFIG config =
2364  TempConfigFor(AdaptedTemplates->Class[ambig_class_id], cfg);
2365  if (config != NULL && TempConfigReliable(ambig_class_id, config)) {
2366  if (classify_learning_debug_level >= 1) {
2367  tprintf("Making config %d of %s permanent\n", cfg,
2368  getDict().getUnicharset().debug_str(
2369  ambig_class_id).string());
2370  }
2371  MakePermanent(AdaptedTemplates, ambig_class_id, cfg, Blob);
2372  }
2373  }
2374  }
2375 }
2376 
2377 } // namespace tesseract
double matcher_rating_margin
Definition: classify.h:424
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
bool PiecesAllNatural(int start, int count) const
Definition: pageres.cpp:1072
void ComputeBest()
Definition: adaptmatch.cpp:99
int FindGoodProtos(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, PROTO_ID *ProtoArray, int AdaptProtoThreshold, int Debug)
Definition: intmatcher.cpp:557
void AddProtoToProtoPruner(PROTO Proto, int ProtoId, INT_CLASS Class, bool debug)
Definition: intproto.cpp:384
Pix * pix() const
Definition: normalis.h:248
UnicityTable< FontSet > fontset_table_
Definition: classify.h:496
bool classify_enable_adaptive_debugger
Definition: classify.h:414
inT64 GetEndOffset(TessdataType tessdata_type) const
INT_TEMPLATES ReadIntTemplates(FILE *File)
Definition: intproto.cpp:761
int32_t * configs
Definition: fontinfo.h:141
#define copy_all_bits(source, dest, length)
Definition: bitvec.h:49
#define NO_DEBUG
Definition: adaptmatch.cpp:70
int MakeNewTemporaryConfig(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int FontinfoId, int NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_SET FloatFeatures)
double matcher_good_threshold
Definition: classify.h:420
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:223
uinT16 NumProtos
Definition: intproto.h:108
double classify_character_fragments_garbage_certainty_threshold
Definition: classify.h:453
UNICHAR_ID * BaselineClassifier(TBLOB *Blob, const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, ADAPT_TEMPLATES Templates, ADAPT_RESULTS *Results)
double matcher_clustering_max_angle_delta
Definition: classify.h:432
void cprintf(const char *format,...)
Definition: callcpp.cpp:40
bool DeSerialize(bool swap, FILE *fp)
Definition: shapetable.cpp:256
bool eq(UNICHAR_ID unichar_id, const char *const unichar_repr) const
Definition: unicharset.cpp:656
const int kBlnBaselineOffset
Definition: normalis.h:29
void ComputeCharNormArrays(FEATURE_STRUCT *norm_feature, INT_TEMPLATES_STRUCT *templates, uinT8 *char_norm_array, uinT8 *pruner_array)
#define TRUE
Definition: capi.h:45
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
bool get_isalpha(UNICHAR_ID unichar_id) const
Definition: unicharset.h:449
void StartBackupAdaptiveClassifier()
Definition: adaptmatch.cpp:644
int IntCastRounded(double x)
Definition: helpers.h:172
void SettupStopperPass1()
Sets up stopper variables in preparation for the first pass.
Definition: stopper.cpp:369
short inT16
Definition: host.h:33
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
double certainty_scale
Definition: dict.h:609
static void BreakPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:194
bool classify_enable_learning
Definition: classify.h:389
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:190
LIST delete_d(LIST list, void *key, int_compare is_equal)
Definition: oldlist.cpp:120
bool LargeSpeckle(const TBLOB &blob)
Definition: classify.cpp:235
bool classify_enable_adaptive_matcher
Definition: classify.h:409
ADAPT_TEMPLATES AdaptedTemplates
Definition: classify.h:473
TWERD * rebuild_word
Definition: pageres.h:244
CharSegmentationType
Definition: classify.h:54
#define IncreaseConfidence(TempConfig)
Definition: adaptive.h:108
#define LENGTH_COMPRESSION
Definition: normfeat.h:26
bool TESS_API contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
void Init(tesseract::IntParam *classify_debug_level)
Definition: intmatcher.cpp:679
static void Update()
Definition: scrollview.cpp:715
#define PRINT_PROTO_MATCHES
Definition: intproto.h:194
inT32 length() const
Definition: strngs.cpp:196
float adjust_factor() const
Definition: ratngs.h:304
int size() const
Definition: unicharset.h:297
PROTO_ID MakeNewTempProtos(FEATURE_SET Features, int NumBadFeat, FEATURE_ID BadFeat[], INT_CLASS IClass, ADAPT_CLASS Class, BIT_VECTOR TempProtoMask)
void FreeFeature(FEATURE Feature)
Definition: ocrfeatures.cpp:60
void get_top_bottom(UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
Definition: unicharset.h:526
bool use_ambigs_for_adaption
Definition: ccutil.h:91
void LearnPieces(const char *fontname, int start, int length, float threshold, CharSegmentationType segmentation, const char *correct_text, WERD_RES *word)
Definition: adaptmatch.cpp:368
INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES]
Definition: intproto.h:155
void AddProtoToClassPruner(PROTO Proto, CLASS_ID ClassId, INT_TEMPLATES Templates)
Definition: intproto.cpp:342
void ClearCharNormArray(uinT8 *char_norm_array)
Definition: float2int.cpp:48
uinT16 ProtoId
Definition: adaptive.h:30
#define MAX_ADAPTABLE_WERD_SIZE
Definition: adaptmatch.cpp:71
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
STRING imagefile
Definition: ccutil.h:72
CLUSTERCONFIG Config
void DoAdaptiveMatch(TBLOB *Blob, ADAPT_RESULTS *Results)
static void JoinPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:216
void Match(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, inT16 NumFeatures, const INT_FEATURE_STRUCT *Features, tesseract::UnicharRating *Result, int AdaptFeatureThreshold, int Debug, bool SeparateDebugWindows)
Definition: intmatcher.cpp:475
#define MIN(x, y)
Definition: ndminx.h:28
static void SetupBLCNDenorms(const TBLOB &blob, bool nonlinear_norm, DENORM *bl_denorm, DENORM *cn_denorm, INT_FX_RESULT_STRUCT *fx_info)
Definition: intfx.cpp:133
double matcher_bad_match_pad
Definition: classify.h:423
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
Definition: ratngs.h:94
bool HasNonfragment
Definition: adaptmatch.cpp:84
inT16 PROTO_ID
Definition: matchdefs.h:41
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:273
GenericVector< ScoredFont > fonts
Definition: shapetable.h:88
void SettupStopperPass2()
Sets up stopper variables in preparation for the second pass.
Definition: stopper.cpp:373
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:80
BIT_VECTOR TempProtoMask
Definition: classify.h:483
void RefreshDebugWindow(ScrollView **win, const char *msg, int y_offset, const TBOX &wbox)
Definition: adaptmatch.cpp:220
const UNICHARSET & getUnicharset() const
Definition: dict.h:97
INT_TEMPLATES PreTrainedTemplates
Definition: classify.h:469
#define MAX_MATCHES
Definition: adaptmatch.cpp:68
double ComputeCorrectedRating(bool debug, int unichar_id, double cp_rating, double im_rating, int feature_misses, int bottom, int top, int blob_length, int matcher_multiplier, const uinT8 *cn_factors)
void ResetAdaptiveClassifierInternal()
Definition: adaptmatch.cpp:613
void LearnWord(const char *fontname, WERD_RES *word)
Definition: adaptmatch.cpp:244
char * classify_learn_debug_str
Definition: classify.h:459
double tessedit_class_miss_scale
Definition: classify.h:439
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
unsigned char uinT8
Definition: host.h:32
int ShapeIDToClassID(int shape_id) const
void ReadNewCutoffs(FILE *CutoffFile, bool swap, inT64 end_offset, CLASS_CUTOFF_ARRAY Cutoffs)
Definition: cutoffs.cpp:52
bool prioritize_division
Definition: classify.h:387
void EndDangerousAmbigs()
Definition: stopper.cpp:367
FLOAT32 Angle
Definition: protos.h:49
WERD_CHOICE * best_choice
Definition: pageres.h:219
TBOX bounding_box() const
Definition: blobs.cpp:482
BIT_VECTOR PermProtos
Definition: adaptive.h:68
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:89
int classify_adapt_feature_threshold
Definition: classify.h:447
PERM_CONFIG_STRUCT * PERM_CONFIG
Definition: adaptive.h:55
#define set_all_bits(array, length)
Definition: bitvec.h:41
int matcher_permanent_classes_min
Definition: classify.h:426
const double kStandardFeatureLength
Definition: intfx.h:46
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
FLOAT32 X
Definition: protos.h:47
#define MAX_NUM_PROTOS
Definition: intproto.h:47
int push_back(T object)
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
bool classify_use_pre_adapted_templates
Definition: classify.h:411
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
virtual void DebugDisplay(const TrainingSample &sample, Pix *page_pix, UNICHAR_ID unichar_id)
FLOAT32 Length
Definition: protos.h:50
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
TrainingSample * BlobToTrainingSample(const TBLOB &blob, bool nonlinear_norm, INT_FX_RESULT_STRUCT *fx_info, GenericVector< INT_FEATURE_STRUCT > *bl_features)
Definition: intfx.cpp:81
PROTO_STRUCT Proto
Definition: adaptive.h:32
void UpdateMatchDisplay()
Definition: intproto.cpp:467
double matcher_perfect_threshold
Definition: classify.h:422
void MakePermanent(ADAPT_TEMPLATES Templates, CLASS_ID ClassId, int ConfigId, TBLOB *Blob)
int AddIntProto(INT_CLASS Class)
Definition: intproto.cpp:293
UnicityTable< FontInfo > fontinfo_table_
Definition: classify.h:488
TessdataManager tessdata_manager
Definition: ccutil.h:69
bool LooksLikeGarbage(TBLOB *blob)
#define ClassForClassId(T, c)
Definition: intproto.h:181
#define MAX_FLOAT32
Definition: host.h:57
void RemoveExtraPuncs(ADAPT_RESULTS *Results)
bool classify_debug_character_fragments
Definition: classify.h:455
#define UNLIKELY_NUM_FEAT
Definition: adaptmatch.cpp:69
inT16 bottom() const
Definition: rect.h:61
int matcher_sufficient_examples_for_prototyping
Definition: classify.h:430
TEMP_PROTO_STRUCT * TEMP_PROTO
Definition: adaptive.h:37
const char * string() const
Definition: strngs.cpp:201
#define MAX_UINT8
Definition: host.h:54
int GetCharNormFeature(const INT_FX_RESULT_STRUCT &fx_info, INT_TEMPLATES templates, uinT8 *pruner_norm_array, uinT8 *char_norm_array)
void FillABC(PROTO Proto)
Definition: protos.cpp:198
#define SET_BIT(array, bit)
Definition: bitvec.h:57
const FontInfo * fontinfo
Definition: pageres.h:288
NORM_PROTOS * ReadNormProtos(FILE *File, inT64 end_offset)
Definition: normmatch.cpp:245
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:254
ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File)
Definition: adaptive.cpp:369
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:105
void AddNewResult(const UnicharRating &new_result, ADAPT_RESULTS *results)
#define MAX_NUM_INT_FEATURES
Definition: intproto.h:132
FEATURE_SET ExtractOutlineFeatures(TBLOB *Blob)
Definition: outfeat.cpp:47
void plot(ScrollView *window)
Definition: blobs.cpp:918
uinT8 NumPermConfigs
Definition: adaptive.h:65
void ExpandShapesAndApplyCorrections(ADAPT_CLASS *classes, bool debug, int class_id, int bottom, int top, float cp_rating, int blob_length, int matcher_multiplier, const uinT8 *cn_factors, UnicharRating *int_result, ADAPT_RESULTS *final_results)
#define FALSE
Definition: capi.h:46
void InitMatcherRatings(register FLOAT32 *Rating)
GenericVector< CP_RESULT_STRUCT > CPResults
Definition: adaptmatch.cpp:89
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:54
#define MakeProtoPermanent(Class, ProtoId)
Definition: adaptive.h:99
void plot(ScrollView *window, ScrollView::Color color, ScrollView::Color child_color)
Definition: blobs.cpp:524
void RemoveBadMatches(ADAPT_RESULTS *Results)
const UnicharIdVector * AmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:192
void UpdateAmbigsGroup(CLASS_ID class_id, TBLOB *Blob)
uinT8 NumTimesSeen
Definition: adaptive.h:41
bool matcher_debug_separate_windows
Definition: classify.h:458
uinT8 FEATURE_ID
Definition: matchdefs.h:47
void LearnBlob(const STRING &fontname, TBLOB *Blob, const DENORM &cn_denorm, const INT_FX_RESULT_STRUCT &fx_info, const char *blob_text)
Definition: blobclass.cpp:69
UNICHAR_ID best_unichar_id
Definition: adaptmatch.cpp:85
double classify_adapted_pruning_factor
Definition: classify.h:441
const UnicharAmbigs & getUnicharAmbigs() const
Definition: dict.h:103
float FLOAT32
Definition: host.h:44
BIT_VECTOR AllProtosOn
Definition: classify.h:480
void AdaptiveClassifier(TBLOB *Blob, BLOB_CHOICE_LIST *Choices)
Definition: adaptmatch.cpp:185
STRING debug_str(UNICHAR_ID id) const
Definition: unicharset.cpp:318
inT16 left() const
Definition: rect.h:68
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:739
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
Definition: ratngs.cpp:819
int CharNormClassifier(TBLOB *blob, const TrainingSample &sample, ADAPT_RESULTS *adapt_results)
TWERD * chopped_word
Definition: pageres.h:201
float ApplyCNCorrection(float rating, int blob_length, int normalization_factor, int matcher_multiplier)
const int kBlnXHeight
Definition: normalis.h:28
#define NO_PROTO
Definition: matchdefs.h:42
double classify_misfit_junk_penalty
Definition: classify.h:435
TBOX bounding_box() const
Definition: blobs.cpp:881
INT_TEMPLATES Templates
Definition: adaptive.h:77
const DENORM & denorm() const
Definition: blobs.h:340
int AddIntConfig(INT_CLASS Class)
Definition: intproto.cpp:270
FEATURE_SET ExtractPicoFeatures(TBLOB *Blob)
Definition: picofeat.cpp:67
FLOAT32 ActualOutlineLength(FEATURE Feature)
Definition: normfeat.cpp:32
TBLOB * ClassifyNormalizeIfNeeded() const
Definition: blobs.cpp:363
bool classify_nonlinear_norm
Definition: classify.h:416
void Initialize()
Definition: adaptmatch.cpp:93
Dict & getDict()
Definition: classify.h:65
uinT8 MaxNumTimesSeen
Definition: adaptive.h:66
int ClassAndConfigIDToFontOrShapeID(int class_id, int int_result_config) const
int best_match_index
Definition: adaptmatch.cpp:86
void truncate(int size)
int length() const
Definition: ratngs.h:301
void SetAdaptiveThreshold(FLOAT32 Threshold)
#define MF_SCALE_FACTOR
Definition: mfoutline.h:63
CLASS_ID ClassId
Definition: adaptmatch.cpp:115
ADAPT_TEMPLATES Templates
Definition: adaptmatch.cpp:114
BIT_VECTOR PermConfigs
Definition: adaptive.h:69
BIT_VECTOR AllConfigsOn
Definition: classify.h:481
int classify_adapt_proto_threshold
Definition: classify.h:445
bool AlternativeChoiceAdjustmentsWorseThan(float threshold) const
Definition: pageres.cpp:430
LIST push(LIST list, void *element)
Definition: oldlist.cpp:317
#define MAX(x, y)
Definition: ndminx.h:24
int get_script(UNICHAR_ID unichar_id) const
Definition: unicharset.h:611
const UnicharIdVector * ReverseAmbigsForAdaption(UNICHAR_ID unichar_id) const
Definition: ambigs.h:201
int inT32
Definition: host.h:35
int matcher_min_examples_for_prototyping
Definition: classify.h:428
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
int NumBlobs() const
Definition: blobs.h:425
GenericVector< STRING > correct_text
Definition: pageres.h:259
void ConvertProto(PROTO Proto, int ProtoId, INT_CLASS Class)
Definition: intproto.cpp:516
int PruneClasses(const INT_TEMPLATES_STRUCT *int_templates, int num_features, int keep_this, const INT_FEATURE_STRUCT *features, const uinT8 *normalization_factors, const uinT16 *expected_num_features, GenericVector< CP_RESULT_STRUCT > *results)
Definition: intmatcher.cpp:412
#define tprintf(...)
Definition: tprintf.h:31
#define MAX_INT32
Definition: host.h:53
int classify_learning_debug_level
Definition: classify.h:419
double segment_penalty_dict_case_ok
Definition: dict.h:582
Definition: strngs.h:44
void InitIntegerFX()
Definition: intfx.cpp:55
ShapeTable * shape_table_
Definition: classify.h:512
int CharNormTrainingSample(bool pruner_only, int keep_this, const TrainingSample &sample, GenericVector< UnicharRating > *results)
int size() const
Definition: genericvector.h:72
bool disable_character_fragments
Definition: classify.h:450
double classify_adapted_pruning_threshold
Definition: classify.h:443
void AddLargeSpeckleTo(int blob_length, BLOB_CHOICE_LIST *choices)
Definition: classify.cpp:212
void MasterMatcher(INT_TEMPLATES templates, inT16 num_features, const INT_FEATURE_STRUCT *features, const uinT8 *norm_factors, ADAPT_CLASS *classes, int debug, int matcher_multiplier, const TBOX &blob_box, const GenericVector< CP_RESULT_STRUCT > &results, ADAPT_RESULTS *final_results)
int classify_integer_matcher_multiplier
Definition: classify.h:469
#define PRINT_MATCH_SUMMARY
Definition: intproto.h:190
Definition: blobs.h:261
int size() const
Definition: shapetable.h:202
FLOAT32 Y
Definition: protos.h:48
void DisplayAdaptedChar(TBLOB *blob, INT_CLASS_STRUCT *int_class)
Definition: adaptmatch.cpp:979
FEATURE NewFeature(const FEATURE_DESC_STRUCT *FeatureDesc)
Definition: ocrfeatures.cpp:94
inT16 top() const
Definition: rect.h:54
BIT_VECTOR AllConfigsOff
Definition: classify.h:482
const Shape & GetShape(int shape_id) const
Definition: shapetable.h:323
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
inT32 BlobLength
Definition: adaptmatch.cpp:83
int FindBadFeatures(INT_CLASS ClassTemplate, BIT_VECTOR ProtoMask, BIT_VECTOR ConfigMask, uinT16 BlobLength, inT16 NumFeatures, INT_FEATURE_ARRAY Features, FEATURE_ID *FeatureArray, int AdaptFeatureThreshold, int Debug)
Definition: intmatcher.cpp:627
STRING to_string() const
Definition: unicharset.h:73
void FreeTempProto(void *arg)
Definition: adaptive.cpp:90
void EndAdaptiveClassifier()
Definition: adaptmatch.cpp:456
STRING language_data_path_prefix
Definition: ccutil.h:68
int length() const
Definition: genericvector.h:79
int GetAdaptiveFeatures(TBLOB *Blob, INT_FEATURE_ARRAY IntFeatures, FEATURE_SET *FloatFeatures)
Definition: adaptmatch.cpp:812
#define test_bit(array, bit)
Definition: bitvec.h:61
Definition: cluster.h:32
bool TempConfigReliable(CLASS_ID class_id, const TEMP_CONFIG &config)
void ConvertConfig(BIT_VECTOR Config, int ConfigId, INT_CLASS Class)
Definition: intproto.cpp:487
bool SeekToStart(TessdataType tessdata_type)
void DebugAdaptiveClassifier(TBLOB *Blob, ADAPT_RESULTS *Results)
unsigned int uinT32
Definition: host.h:36
#define ADAPT_TEMPLATE_SUFFIX
Definition: adaptmatch.cpp:66
void PrintAdaptiveMatchResults(const ADAPT_RESULTS &results)
#define MakeConfigPermanent(Class, ConfigId)
Definition: adaptive.h:96
#define LegalClassId(c)
Definition: intproto.h:179
UNICHAR_ID * GetAmbiguities(TBLOB *Blob, CLASS_ID CorrectClass)
#define PRINT_FEATURE_MATCHES
Definition: intproto.h:193
void * alloc_struct(inT32 count, const char *)
Definition: memry.cpp:39
void ShowBestMatchFor(int shape_id, const INT_FEATURE_STRUCT *features, int num_features)
int MakeTempProtoPerm(void *item1, void *item2)
void SetAdaptiveThreshold(FLOAT32 Threshold)
bool classify_bln_numeric_mode
Definition: classify.h:500
const FEATURE_DESC_STRUCT CharNormDesc
void ComputeIntFeatures(FEATURE_SET Features, INT_FEATURE_ARRAY IntFeatures)
Definition: float2int.cpp:100
#define reset_bit(array, bit)
Definition: bitvec.h:59
bool get_isdigit(UNICHAR_ID unichar_id) const
Definition: unicharset.h:470
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
FEATURE Features[1]
Definition: ocrfeatures.h:72
void ConvertMatchesToChoices(const DENORM &denorm, const TBOX &box, ADAPT_RESULTS *Results, BLOB_CHOICE_LIST *Choices)
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
void AdaptToChar(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, FLOAT32 Threshold, ADAPT_TEMPLATES adaptive_templates)
Definition: adaptmatch.cpp:886
double matcher_reliable_adaptive_result
Definition: classify.h:421
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:77
void XHeightRange(int unichar_id, const UNICHARSET &unicharset, const TBOX &bbox, float *min_xht, float *max_xht, float *yshift) const
Definition: normalis.cpp:428
UNICHARSET unicharset
Definition: ccutil.h:70
GenericVector< int > best_state
Definition: pageres.h:255
void InitAdaptiveClassifier(bool load_pre_trained_templates)
Definition: adaptmatch.cpp:527
char window_wait(ScrollView *win)
Definition: callcpp.cpp:111
#define zero_all_bits(array, length)
Definition: bitvec.h:33
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
static int SortDescendingRating(const void *t1, const void *t2)
Definition: shapetable.h:56
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:826
uinT8 NumConfigs
Definition: intproto.h:110
bool classify_save_adapted_templates
Definition: classify.h:413
#define ADAPTABLE_WERD_ADJUSTMENT
Definition: adaptmatch.cpp:73
GenericVector< UnicharRating > match
Definition: adaptmatch.cpp:88
const STRING debug_string() const
Definition: ratngs.h:503
ADAPT_TEMPLATES BackupAdaptedTemplates
Definition: classify.h:477
#define WORST_POSSIBLE_RATING
Definition: adaptmatch.cpp:77
STRING DebugStr(int shape_id) const
Definition: shapetable.cpp:291
virtual int UnicharClassifySample(const TrainingSample &sample, Pix *page_pix, int debug, UNICHAR_ID keep_this, GenericVector< UnicharRating > *results)
int MaxNumUnichars() const
Definition: shapetable.cpp:465
bool empty() const
Definition: genericvector.h:84
void ComputeAdaptionThresholds(float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
Definition: pageres.cpp:553
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
UNICHAR_ID * Ambigs
Definition: adaptive.h:52
double certainty_scale
Definition: classify.h:437
const CHAR_FRAGMENT * get_fragment(UNICHAR_ID unichar_id) const
Definition: unicharset.h:682
#define ASSERT_HOST(x)
Definition: errcode.h:84
NORM_PROTOS * NormProtos
Definition: classify.h:486
FLOAT32 best_rating
Definition: adaptmatch.cpp:87
void ClassifyAsNoise(ADAPT_RESULTS *Results)
void ComputeIntCharNormArray(const FEATURE_STRUCT &norm_feature, uinT8 *char_norm_array)
Definition: float2int.cpp:69
void SwitchAdaptiveClassifier()
Definition: adaptmatch.cpp:628
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
IntegerMatcher im_
Definition: classify.h:503
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
bool MarginalMatch(float confidence, float matcher_great_threshold)
Definition: adaptmatch.cpp:122
#define GetPicoFeatureLength()
Definition: picofeat.h:59
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:505
void InitAdaptedClass(TBLOB *Blob, CLASS_ID ClassId, int FontinfoId, ADAPT_CLASS Class, ADAPT_TEMPLATES Templates)
Definition: adaptmatch.cpp:717
double matcher_avg_noise_size
Definition: classify.h:425
#define Y_DIM_OFFSET
Definition: adaptmatch.cpp:75
bool AdaptableWord(WERD_RES *word)
Definition: adaptmatch.cpp:850
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
int UNICHAR_ID
Definition: unichar.h:33
void AmbigClassifier(const GenericVector< INT_FEATURE_STRUCT > &int_features, const INT_FX_RESULT_STRUCT &fx_info, const TBLOB *blob, INT_TEMPLATES templates, ADAPT_CLASS *classes, UNICHAR_ID *ambiguities, ADAPT_RESULTS *results)
STRING ClassIDToDebugStr(const INT_TEMPLATES_STRUCT *templates, int class_id, int config_id) const