tesseract  3.05.02
rejctmap.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: rejctmap.h (Formerly rejmap.h)
3  * Description: REJ and REJMAP class functions.
4  * Author: Phil Cheatle
5  * Created: Thu Jun 9 13:46:38 BST 1994
6  *
7  * (C) Copyright 1994, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18 
19 This module may look unnecessarily verbose, but here's the philosophy...
20 
21 ALL processing of the reject map is done in this module. There are lots of
22 separate calls to set reject/accept flags. These have DELIBERATELY been kept
23 distinct so that this module can decide what to do.
24 
25 Basically, there is a flag for each sort of rejection or acceptance. This
26 provides a history of what has happened to EACH character.
27 
28 Determining whether a character is CURRENTLY rejected depends on implicit
29 understanding of the SEQUENCE of possible calls. The flags are defined and
30 grouped in the REJ_FLAGS enum. These groupings are used in determining a
31 characters CURRENT rejection status. Basically, a character is ACCEPTED if
32 
33  none of the permanent rej flags are set
34  AND ( the character has never been rejected
35  OR an accept flag is set which is LATER than the latest reject flag )
36 
37 IT IS FUNDAMENTAL THAT ANYONE HACKING THIS CODE UNDERSTANDS THE SIGNIFICANCE
38 OF THIS IMPLIED TEMPORAL ORDERING OF THE FLAGS!!!!
39 **********************************************************************/
40 
41 #ifndef REJCTMAP_H
42 #define REJCTMAP_H
43 
44 #ifdef __UNIX__
45 #include <assert.h>
46 #endif
47 #include "memry.h"
48 #include "bits16.h"
49 #include "params.h"
50 
51 enum REJ_FLAGS {
52  /* Reject modes which are NEVER overridden */
53  R_TESS_FAILURE, // PERM Tess didn't classify
54  R_SMALL_XHT, // PERM Xht too small
55  R_EDGE_CHAR, // PERM Too close to edge of image
56  R_1IL_CONFLICT, // PERM 1Il confusion
57  R_POSTNN_1IL, // PERM 1Il unrejected by NN
58  R_REJ_CBLOB, // PERM Odd blob
59  R_MM_REJECT, // PERM Matrix match rejection (m's)
60  R_BAD_REPETITION, // TEMP Repeated char which doesn't match trend
61 
62  /* Initial reject modes (pre NN_ACCEPT) */
63  R_POOR_MATCH, // TEMP Ray's original heuristic (Not used)
64  R_NOT_TESS_ACCEPTED, // TEMP Tess didn't accept WERD
65  R_CONTAINS_BLANKS, // TEMP Tess failed on other chs in WERD
66  R_BAD_PERMUTER, // POTENTIAL Bad permuter for WERD
67 
68  /* Reject modes generated after NN_ACCEPT but before MM_ACCEPT */
69  R_HYPHEN, // TEMP Post NN dodgy hyphen or full stop
70  R_DUBIOUS, // TEMP Post NN dodgy chars
71  R_NO_ALPHANUMS, // TEMP No alphanumerics in word after NN
72  R_MOSTLY_REJ, // TEMP Most of word rejected so rej the rest
73  R_XHT_FIXUP, // TEMP Xht tests unsure
74 
75  /* Reject modes generated after MM_ACCEPT but before QUALITY_ACCEPT */
76  R_BAD_QUALITY, // TEMP Quality metrics bad for WERD
77 
78  /* Reject modes generated after QUALITY_ACCEPT but before MINIMAL_REJ accep*/
79  R_DOC_REJ, // TEMP Document rejection
80  R_BLOCK_REJ, // TEMP Block rejection
81  R_ROW_REJ, // TEMP Row rejection
82  R_UNLV_REJ, // TEMP ~ turned to - or ^ turned to space
83 
84  /* Accept modes which occur between the above rejection groups */
85  R_NN_ACCEPT, // NN acceptance
86  R_HYPHEN_ACCEPT, // Hyphen acceptance
87  R_MM_ACCEPT, // Matrix match acceptance
88  R_QUALITY_ACCEPT, // Accept word in good quality doc
89  R_MINIMAL_REJ_ACCEPT // Accept EVERYTHING except tess failures
90 };
91 
92 /* REJECT MAP VALUES */
93 
94 #define MAP_ACCEPT '1'
95 #define MAP_REJECT_PERM '0'
96 #define MAP_REJECT_TEMP '2'
97 #define MAP_REJECT_POTENTIAL '3'
98 
99 class REJ
100 {
101  BITS16 flags1;
102  BITS16 flags2;
103 
104  void set_flag(REJ_FLAGS rej_flag) {
105  if (rej_flag < 16)
106  flags1.turn_on_bit (rej_flag);
107  else
108  flags2.turn_on_bit (rej_flag - 16);
109  }
110 
111  BOOL8 rej_before_nn_accept();
112  BOOL8 rej_between_nn_and_mm();
113  BOOL8 rej_between_mm_and_quality_accept();
114  BOOL8 rej_between_quality_and_minimal_rej_accept();
115  BOOL8 rej_before_mm_accept();
116  BOOL8 rej_before_quality_accept();
117 
118  public:
119  REJ() { //constructor
120  }
121 
122  REJ( //classwise copy
123  const REJ &source) {
124  flags1 = source.flags1;
125  flags2 = source.flags2;
126  }
127 
128  REJ & operator= ( //assign REJ
129  const REJ & source) { //from this
130  flags1 = source.flags1;
131  flags2 = source.flags2;
132  return *this;
133  }
134 
135  BOOL8 flag(REJ_FLAGS rej_flag) {
136  if (rej_flag < 16)
137  return flags1.bit (rej_flag);
138  else
139  return flags2.bit (rej_flag - 16);
140  }
141 
142  char display_char() {
143  if (perm_rejected ())
144  return MAP_REJECT_PERM;
145  else if (accept_if_good_quality ())
146  return MAP_REJECT_POTENTIAL;
147  else if (rejected ())
148  return MAP_REJECT_TEMP;
149  else
150  return MAP_ACCEPT;
151  }
152 
153  BOOL8 perm_rejected(); //Is char perm reject?
154 
155  BOOL8 rejected(); //Is char rejected?
156 
157  BOOL8 accepted() { //Is char accepted?
158  return !rejected ();
159  }
160 
161  //potential rej?
163 
165  return (rejected () && !perm_rejected ());
166  }
167 
168  void setrej_tess_failure(); //Tess generated blank
169  void setrej_small_xht(); //Small xht char/wd
170  void setrej_edge_char(); //Close to image edge
171  void setrej_1Il_conflict(); //Initial reject map
172  void setrej_postNN_1Il(); //1Il after NN
173  void setrej_rej_cblob(); //Insert duff blob
174  void setrej_mm_reject(); //Matrix matcher
175  //Odd repeated char
176  void setrej_bad_repetition();
177  void setrej_poor_match(); //Failed Rays heuristic
178  //TEMP reject_word
180  //TEMP reject_word
181  void setrej_contains_blanks();
182  void setrej_bad_permuter(); //POTENTIAL reject_word
183  void setrej_hyphen(); //PostNN dubious hyph or .
184  void setrej_dubious(); //PostNN dubious limit
185  void setrej_no_alphanums(); //TEMP reject_word
186  void setrej_mostly_rej(); //TEMP reject_word
187  void setrej_xht_fixup(); //xht fixup
188  void setrej_bad_quality(); //TEMP reject_word
189  void setrej_doc_rej(); //TEMP reject_word
190  void setrej_block_rej(); //TEMP reject_word
191  void setrej_row_rej(); //TEMP reject_word
192  void setrej_unlv_rej(); //TEMP reject_word
193  void setrej_nn_accept(); //NN Flipped a char
194  void setrej_hyphen_accept(); //Good aspect ratio
195  void setrej_mm_accept(); //Matrix matcher
196  //Quality flip a char
197  void setrej_quality_accept();
198  //Accept all except blank
200 
201  void full_print(FILE *fp);
202 };
203 
204 class REJMAP
205 {
206  REJ *ptr; //ptr to the chars
207  inT16 len; //Number of chars
208 
209  public:
210  REJMAP() { //constructor
211  ptr = NULL;
212  len = 0;
213  }
214 
215  REJMAP( //classwise copy
216  const REJMAP &rejmap);
217 
218  REJMAP & operator= ( //assign REJMAP
219  const REJMAP & source); //from this
220 
221  ~REJMAP () { //destructor
222  if (ptr != NULL)
223  free_struct (ptr, len * sizeof (REJ), "REJ");
224  }
225 
226  void initialise( //Redefine map
227  inT16 length);
228 
229  REJ & operator[]( //access function
230  inT16 index) const //map index
231  {
232  ASSERT_HOST (index < len);
233  return ptr[index]; //no bounds checks
234  }
235 
236  inT32 length() const { //map length
237  return len;
238  }
239 
240  inT16 accept_count(); //How many accepted?
241 
242  inT16 reject_count() { //How many rejects?
243  return len - accept_count ();
244  }
245 
246  void remove_pos( //Cut out an element
247  inT16 pos); //element to remove
248 
249  void print(FILE *fp);
250 
251  void full_print(FILE *fp);
252 
253  BOOL8 recoverable_rejects(); //Any non perm rejs?
254 
256  //Any potential rejs?
257 
258  void rej_word_small_xht(); //Reject whole word
259  //Reject whole word
260  void rej_word_tess_failure();
262  //Reject whole word
263  //Reject whole word
265  //Reject whole word
266  void rej_word_bad_permuter();
267  void rej_word_xht_fixup(); //Reject whole word
268  //Reject whole word
269  void rej_word_no_alphanums();
270  void rej_word_mostly_rej(); //Reject whole word
271  void rej_word_bad_quality(); //Reject whole word
272  void rej_word_doc_rej(); //Reject whole word
273  void rej_word_block_rej(); //Reject whole word
274  void rej_word_row_rej(); //Reject whole word
275 };
276 #endif
void setrej_hyphen_accept()
Definition: rejctmap.cpp:208
BOOL8 quality_recoverable_rejects()
Definition: rejctmap.cpp:354
void rej_word_bad_quality()
Definition: rejctmap.cpp:488
BOOL8 recoverable_rejects()
Definition: rejctmap.cpp:343
void print(FILE *fp)
Definition: rejctmap.cpp:394
void setrej_minimal_rej_accept()
Definition: rejctmap.cpp:228
void rej_word_doc_rej()
Definition: rejctmap.cpp:497
void free_struct(void *deadstruct, inT32, const char *)
Definition: memry.cpp:43
short inT16
Definition: host.h:33
BOOL8 bit(uinT8 bit_num) const
Definition: bits16.h:56
void setrej_xht_fixup()
Definition: rejctmap.cpp:178
void setrej_no_alphanums()
Definition: rejctmap.cpp:168
void setrej_not_tess_accepted()
Definition: rejctmap.cpp:141
void full_print(FILE *fp)
Definition: rejctmap.cpp:406
~REJMAP()
Definition: rejctmap.h:221
void initialise(inT16 length)
Definition: rejctmap.cpp:318
void setrej_block_rej()
Definition: rejctmap.cpp:193
void rej_word_not_tess_accepted()
Definition: rejctmap.cpp:434
inT32 length() const
Definition: rejctmap.h:236
void setrej_poor_match()
Definition: rejctmap.cpp:136
REJ(const REJ &source)
Definition: rejctmap.h:122
REJ & operator=(const REJ &source)
Definition: rejctmap.h:128
void setrej_small_xht()
Definition: rejctmap.cpp:101
BOOL8 accept_if_good_quality()
Definition: rejctmap.cpp:83
void remove_pos(inT16 pos)
Definition: rejctmap.cpp:365
void setrej_row_rej()
Definition: rejctmap.cpp:198
void rej_word_tess_failure()
Definition: rejctmap.cpp:425
REJMAP()
Definition: rejctmap.h:210
inT16 reject_count()
Definition: rejctmap.h:242
REJ & operator[](inT16 index) const
Definition: rejctmap.h:229
void rej_word_xht_fixup()
Definition: rejctmap.cpp:461
unsigned char BOOL8
Definition: host.h:46
BOOL8 rejected()
Definition: rejctmap.cpp:73
void setrej_tess_failure()
Definition: rejctmap.cpp:96
void setrej_1Il_conflict()
Definition: rejctmap.cpp:111
void setrej_hyphen()
Definition: rejctmap.cpp:158
void rej_word_block_rej()
Definition: rejctmap.cpp:506
REJ_FLAGS
Definition: rejctmap.h:51
BOOL8 perm_rejected()
Definition: rejctmap.cpp:24
void full_print(FILE *fp)
Definition: rejctmap.cpp:234
inT16 accept_count()
Definition: rejctmap.cpp:331
void setrej_nn_accept()
Definition: rejctmap.cpp:213
BOOL8 accepted()
Definition: rejctmap.h:157
void rej_word_mostly_rej()
Definition: rejctmap.cpp:479
void setrej_rej_cblob()
Definition: rejctmap.cpp:121
REJMAP & operator=(const REJMAP &source)
Definition: rejctmap.cpp:297
void setrej_bad_repetition()
Definition: rejctmap.cpp:131
void setrej_contains_blanks()
Definition: rejctmap.cpp:147
BOOL8 flag(REJ_FLAGS rej_flag)
Definition: rejctmap.h:135
void rej_word_small_xht()
Definition: rejctmap.cpp:416
void setrej_bad_quality()
Definition: rejctmap.cpp:183
BOOL8 recoverable()
Definition: rejctmap.h:164
void rej_word_row_rej()
Definition: rejctmap.cpp:515
int inT32
Definition: host.h:35
void rej_word_bad_permuter()
Definition: rejctmap.cpp:452
void setrej_mostly_rej()
Definition: rejctmap.cpp:173
void setrej_postNN_1Il()
Definition: rejctmap.cpp:116
void setrej_mm_accept()
Definition: rejctmap.cpp:218
Definition: rejctmap.h:99
char display_char()
Definition: rejctmap.h:142
void setrej_unlv_rej()
Definition: rejctmap.cpp:203
#define MAP_REJECT_TEMP
Definition: rejctmap.h:96
void setrej_mm_reject()
Definition: rejctmap.cpp:126
void rej_word_contains_blanks()
Definition: rejctmap.cpp:443
#define MAP_ACCEPT
Definition: rejctmap.h:94
void turn_on_bit(uinT8 bit_num)
Definition: bits16.h:37
void rej_word_no_alphanums()
Definition: rejctmap.cpp:470
void setrej_doc_rej()
Definition: rejctmap.cpp:188
void setrej_edge_char()
Definition: rejctmap.cpp:106
#define MAP_REJECT_PERM
Definition: rejctmap.h:95
Definition: bits16.h:25
void setrej_bad_permuter()
Definition: rejctmap.cpp:153
#define MAP_REJECT_POTENTIAL
Definition: rejctmap.h:97
#define ASSERT_HOST(x)
Definition: errcode.h:84
void setrej_dubious()
Definition: rejctmap.cpp:163
REJ()
Definition: rejctmap.h:119
void setrej_quality_accept()
Definition: rejctmap.cpp:223