tesseract  3.05.02
lm_consistency.h
Go to the documentation of this file.
1 // File: lm_consistency.h
3 // Description: Struct for recording consistency of the paths representing
4 // OCR hypotheses.
5 // Author: Rika Antonova
6 // Created: Mon Jun 20 11:26:43 PST 2012
7 //
8 // (C) Copyright 2012, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #include "dawg.h"
22 #include "dict.h"
23 #include "host.h"
24 #include "ratngs.h"
25 
26 #ifndef TESSERACT_WORDREC_CONSISTENCY_H_
27 #define TESSERACT_WORDREC_CONSISTENCY_H_
28 
29 namespace tesseract {
30 
31 static const char * const XHeightConsistencyEnumName[] = {
32  "XH_GOOD",
33  "XH_SUBNORMAL",
34  "XH_INCONSISTENT",
35 };
36 
37 // Struct for keeping track of the consistency of the path.
40 
41  // How much do characters have to be shifted away from normal parameters
42  // before we say they're not normal?
43  static const int kShiftThresh = 1;
44 
45  // How much shifting from subscript to superscript and back
46  // before we declare shenanigans?
47  static const int kMaxEntropy = 1;
48 
49  // Script positions - order important for entropy calculation.
50  static const int kSUB = 0, kNORM = 1, kSUP = 2;
51  static const int kNumPos = 3;
52 
53  explicit LMConsistencyInfo(const LMConsistencyInfo* parent_info) {
54  if (parent_info == NULL) {
55  // Initialize from scratch.
56  num_alphas = 0;
57  num_digits = 0;
58  num_punc = 0;
59  num_other = 0;
60  chartype = CT_NONE;
61  punc_ref = NO_EDGE;
62  invalid_punc = false;
64  num_lower = 0;
65  script_id = 0;
66  inconsistent_script = false;
68  inconsistent_font = false;
69  // Initialize XHeight stats.
70  for (int i = 0; i < kNumPos; i++) {
71  xht_count[i] = 0;
72  xht_count_punc[i] = 0;
73  xht_lo[i] = 0;
74  xht_hi[i] = 256; // kBlnCellHeight
75  }
76  xht_sp = -1; // This invalid value indicates that there was no parent.
77  xpos_entropy = 0;
79  } else {
80  // Copy parent info
81  *this = *parent_info;
82  }
83  }
84  inline int NumInconsistentPunc() const {
85  return invalid_punc ? num_punc : 0;
86  }
87  inline int NumInconsistentCase() const {
89  }
90  inline int NumInconsistentChartype() const {
91  return (NumInconsistentPunc() + num_other +
93  }
94  inline bool Consistent() const {
95  return (NumInconsistentPunc() == 0 && NumInconsistentCase() == 0 &&
98  }
99  inline int NumInconsistentSpaces() const {
101  }
102  inline int InconsistentXHeight() const {
103  return xht_decision == XH_INCONSISTENT;
104  }
105  void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc);
106  float BodyMinXHeight() const {
107  if (InconsistentXHeight())
108  return 0.0f;
109  return xht_lo[kNORM];
110  }
111  float BodyMaxXHeight() const {
112  if (InconsistentXHeight())
113  return static_cast<float>(MAX_INT16);
114  return xht_hi[kNORM];
115  }
116 
119  int num_punc;
130  // Metrics clumped by position.
131  float xht_lo[kNumPos];
132  float xht_hi[kNumPos];
138 };
139 
140 
141 } // namespace tesseract
142 
143 #endif // TESSERACT_WORDREC_CONSISTENCY_H_
static const int kMaxEntropy
inT64 EDGE_REF
Definition: dawg.h:54
short inT16
Definition: host.h:33
LMConsistencyInfo(const LMConsistencyInfo *parent_info)
XHeightConsistencyEnum
Definition: dict.h:74
static const int kShiftThresh
void ComputeXheightConsistency(const BLOB_CHOICE *b, bool is_punc)
#define MAX_INT16
Definition: host.h:52
XHeightConsistencyEnum xht_decision