tesseract  3.05.02
rejctmap.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: rejctmap.cpp (Formerly rejmap.c)
3  * Description: REJ and REJMAP class functions.
4  * Author: Phil Cheatle
5  * Created: Thu Jun 9 13:46:38 BST 1994
6  *
7  * (C) Copyright 1994, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "host.h"
21 #include "rejctmap.h"
22 #include "params.h"
23 
24 BOOL8 REJ::perm_rejected() { //Is char perm reject?
25  return (flag (R_TESS_FAILURE) ||
26  flag (R_SMALL_XHT) ||
27  flag (R_EDGE_CHAR) ||
28  flag (R_1IL_CONFLICT) ||
29  flag (R_POSTNN_1IL) ||
30  flag (R_REJ_CBLOB) ||
32 }
33 
34 
35 BOOL8 REJ::rej_before_nn_accept() {
36  return flag (R_POOR_MATCH) ||
39 }
40 
41 
42 BOOL8 REJ::rej_between_nn_and_mm() {
43  return flag (R_HYPHEN) ||
44  flag (R_DUBIOUS) ||
46 }
47 
48 
49 BOOL8 REJ::rej_between_mm_and_quality_accept() {
50  return flag (R_BAD_QUALITY);
51 }
52 
53 
54 BOOL8 REJ::rej_between_quality_and_minimal_rej_accept() {
55  return flag (R_DOC_REJ) ||
57 }
58 
59 
60 BOOL8 REJ::rej_before_mm_accept() {
61  return rej_between_nn_and_mm () ||
62  (rej_before_nn_accept () &&
64 }
65 
66 
67 BOOL8 REJ::rej_before_quality_accept() {
68  return rej_between_mm_and_quality_accept () ||
69  (!flag (R_MM_ACCEPT) && rej_before_mm_accept ());
70 }
71 
72 
73 BOOL8 REJ::rejected() { //Is char rejected?
75  return FALSE;
76  else
77  return (perm_rejected () ||
78  rej_between_quality_and_minimal_rej_accept () ||
79  (!flag (R_QUALITY_ACCEPT) && rej_before_quality_accept ()));
80 }
81 
82 
83 BOOL8 REJ::accept_if_good_quality() { //potential rej?
84  return (rejected () &&
85  !perm_rejected () &&
86  flag (R_BAD_PERMUTER) &&
87  !flag (R_POOR_MATCH) &&
90  (!rej_between_nn_and_mm () &&
91  !rej_between_mm_and_quality_accept () &&
92  !rej_between_quality_and_minimal_rej_accept ()));
93 }
94 
95 
96 void REJ::setrej_tess_failure() { //Tess generated blank
97  set_flag(R_TESS_FAILURE);
98 }
99 
100 
101 void REJ::setrej_small_xht() { //Small xht char/wd
102  set_flag(R_SMALL_XHT);
103 }
104 
105 
106 void REJ::setrej_edge_char() { //Close to image edge
107  set_flag(R_EDGE_CHAR);
108 }
109 
110 
111 void REJ::setrej_1Il_conflict() { //Initial reject map
112  set_flag(R_1IL_CONFLICT);
113 }
114 
115 
116 void REJ::setrej_postNN_1Il() { //1Il after NN
117  set_flag(R_POSTNN_1IL);
118 }
119 
120 
121 void REJ::setrej_rej_cblob() { //Insert duff blob
122  set_flag(R_REJ_CBLOB);
123 }
124 
125 
126 void REJ::setrej_mm_reject() { //Matrix matcher
127  set_flag(R_MM_REJECT);
128 }
129 
130 
131 void REJ::setrej_bad_repetition() { //Odd repeated char
132  set_flag(R_BAD_REPETITION);
133 }
134 
135 
136 void REJ::setrej_poor_match() { //Failed Rays heuristic
137  set_flag(R_POOR_MATCH);
138 }
139 
140 
142  //TEMP reject_word
143  set_flag(R_NOT_TESS_ACCEPTED);
144 }
145 
146 
148  //TEMP reject_word
149  set_flag(R_CONTAINS_BLANKS);
150 }
151 
152 
153 void REJ::setrej_bad_permuter() { //POTENTIAL reject_word
154  set_flag(R_BAD_PERMUTER);
155 }
156 
157 
158 void REJ::setrej_hyphen() { //PostNN dubious hyphen or .
159  set_flag(R_HYPHEN);
160 }
161 
162 
163 void REJ::setrej_dubious() { //PostNN dubious limit
164  set_flag(R_DUBIOUS);
165 }
166 
167 
168 void REJ::setrej_no_alphanums() { //TEMP reject_word
169  set_flag(R_NO_ALPHANUMS);
170 }
171 
172 
173 void REJ::setrej_mostly_rej() { //TEMP reject_word
174  set_flag(R_MOSTLY_REJ);
175 }
176 
177 
178 void REJ::setrej_xht_fixup() { //xht fixup
179  set_flag(R_XHT_FIXUP);
180 }
181 
182 
183 void REJ::setrej_bad_quality() { //TEMP reject_word
184  set_flag(R_BAD_QUALITY);
185 }
186 
187 
188 void REJ::setrej_doc_rej() { //TEMP reject_word
189  set_flag(R_DOC_REJ);
190 }
191 
192 
193 void REJ::setrej_block_rej() { //TEMP reject_word
194  set_flag(R_BLOCK_REJ);
195 }
196 
197 
198 void REJ::setrej_row_rej() { //TEMP reject_word
199  set_flag(R_ROW_REJ);
200 }
201 
202 
203 void REJ::setrej_unlv_rej() { //TEMP reject_word
204  set_flag(R_UNLV_REJ);
205 }
206 
207 
208 void REJ::setrej_hyphen_accept() { //NN Flipped a char
209  set_flag(R_HYPHEN_ACCEPT);
210 }
211 
212 
213 void REJ::setrej_nn_accept() { //NN Flipped a char
214  set_flag(R_NN_ACCEPT);
215 }
216 
217 
218 void REJ::setrej_mm_accept() { //Matrix matcher
219  set_flag(R_MM_ACCEPT);
220 }
221 
222 
223 void REJ::setrej_quality_accept() { //Quality flip a char
224  set_flag(R_QUALITY_ACCEPT);
225 }
226 
227 
229  //Accept all except blank
230  set_flag(R_MINIMAL_REJ_ACCEPT);
231 }
232 
233 
234 void REJ::full_print(FILE *fp) {
235  fprintf (fp, "R_TESS_FAILURE: %s\n", flag (R_TESS_FAILURE) ? "T" : "F");
236  fprintf (fp, "R_SMALL_XHT: %s\n", flag (R_SMALL_XHT) ? "T" : "F");
237  fprintf (fp, "R_EDGE_CHAR: %s\n", flag (R_EDGE_CHAR) ? "T" : "F");
238  fprintf (fp, "R_1IL_CONFLICT: %s\n", flag (R_1IL_CONFLICT) ? "T" : "F");
239  fprintf (fp, "R_POSTNN_1IL: %s\n", flag (R_POSTNN_1IL) ? "T" : "F");
240  fprintf (fp, "R_REJ_CBLOB: %s\n", flag (R_REJ_CBLOB) ? "T" : "F");
241  fprintf (fp, "R_MM_REJECT: %s\n", flag (R_MM_REJECT) ? "T" : "F");
242  fprintf (fp, "R_BAD_REPETITION: %s\n", flag (R_BAD_REPETITION) ? "T" : "F");
243  fprintf (fp, "R_POOR_MATCH: %s\n", flag (R_POOR_MATCH) ? "T" : "F");
244  fprintf (fp, "R_NOT_TESS_ACCEPTED: %s\n",
245  flag (R_NOT_TESS_ACCEPTED) ? "T" : "F");
246  fprintf (fp, "R_CONTAINS_BLANKS: %s\n",
247  flag (R_CONTAINS_BLANKS) ? "T" : "F");
248  fprintf (fp, "R_BAD_PERMUTER: %s\n", flag (R_BAD_PERMUTER) ? "T" : "F");
249  fprintf (fp, "R_HYPHEN: %s\n", flag (R_HYPHEN) ? "T" : "F");
250  fprintf (fp, "R_DUBIOUS: %s\n", flag (R_DUBIOUS) ? "T" : "F");
251  fprintf (fp, "R_NO_ALPHANUMS: %s\n", flag (R_NO_ALPHANUMS) ? "T" : "F");
252  fprintf (fp, "R_MOSTLY_REJ: %s\n", flag (R_MOSTLY_REJ) ? "T" : "F");
253  fprintf (fp, "R_XHT_FIXUP: %s\n", flag (R_XHT_FIXUP) ? "T" : "F");
254  fprintf (fp, "R_BAD_QUALITY: %s\n", flag (R_BAD_QUALITY) ? "T" : "F");
255  fprintf (fp, "R_DOC_REJ: %s\n", flag (R_DOC_REJ) ? "T" : "F");
256  fprintf (fp, "R_BLOCK_REJ: %s\n", flag (R_BLOCK_REJ) ? "T" : "F");
257  fprintf (fp, "R_ROW_REJ: %s\n", flag (R_ROW_REJ) ? "T" : "F");
258  fprintf (fp, "R_UNLV_REJ: %s\n", flag (R_UNLV_REJ) ? "T" : "F");
259  fprintf (fp, "R_HYPHEN_ACCEPT: %s\n", flag (R_HYPHEN_ACCEPT) ? "T" : "F");
260  fprintf (fp, "R_NN_ACCEPT: %s\n", flag (R_NN_ACCEPT) ? "T" : "F");
261  fprintf (fp, "R_MM_ACCEPT: %s\n", flag (R_MM_ACCEPT) ? "T" : "F");
262  fprintf (fp, "R_QUALITY_ACCEPT: %s\n", flag (R_QUALITY_ACCEPT) ? "T" : "F");
263  fprintf (fp, "R_MINIMAL_REJ_ACCEPT: %s\n",
264  flag (R_MINIMAL_REJ_ACCEPT) ? "T" : "F");
265 }
266 
267 
268 //The REJMAP class has been hacked to use alloc_struct instead of new [].
269 //This is to reduce memory fragmentation only as it is rather kludgy.
270 //alloc_struct by-passes the call to the constructor of REJ on each
271 //array element. Although the constructor is empty, the BITS16 members
272 //do have a constructor which sets all the flags to 0. The memset
273 //replaces this functionality.
274 
275 REJMAP::REJMAP( //classwise copy
276  const REJMAP &source) {
277  REJ *to;
278  REJ *from = source.ptr;
279  int i;
280 
281  len = source.length ();
282 
283  if (len > 0) {
284  ptr = (REJ *) alloc_struct (len * sizeof (REJ), "REJ");
285  to = ptr;
286  for (i = 0; i < len; i++) {
287  *to = *from;
288  to++;
289  from++;
290  }
291  }
292  else
293  ptr = NULL;
294 }
295 
296 
297 REJMAP & REJMAP::operator= ( //assign REJMAP
298 const REJMAP & source //from this
299 ) {
300  REJ *
301  to;
302  REJ *
303  from = source.ptr;
304  int
305  i;
306 
307  initialise (source.len);
308  to = ptr;
309  for (i = 0; i < len; i++) {
310  *to = *from;
311  to++;
312  from++;
313  }
314  return *this;
315 }
316 
317 
318 void REJMAP::initialise( //Redefine map
319  inT16 length) {
320  if (ptr != NULL)
321  free_struct (ptr, len * sizeof (REJ), "REJ");
322  len = length;
323  if (len > 0)
324  ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
325  0, len * sizeof (REJ));
326  else
327  ptr = NULL;
328 }
329 
330 
331 inT16 REJMAP::accept_count() { //How many accepted?
332  int i;
333  inT16 count = 0;
334 
335  for (i = 0; i < len; i++) {
336  if (ptr[i].accepted ())
337  count++;
338  }
339  return count;
340 }
341 
342 
343 BOOL8 REJMAP::recoverable_rejects() { //Any non perm rejs?
344  int i;
345 
346  for (i = 0; i < len; i++) {
347  if (ptr[i].recoverable ())
348  return TRUE;
349  }
350  return FALSE;
351 }
352 
353 
355  int i;
356 
357  for (i = 0; i < len; i++) {
358  if (ptr[i].accept_if_good_quality ())
359  return TRUE;
360  }
361  return FALSE;
362 }
363 
364 
365 void REJMAP::remove_pos( //Cut out an element
366  inT16 pos //element to remove
367  ) {
368  REJ *new_ptr; //new, smaller map
369  int i;
370 
371  ASSERT_HOST (pos >= 0);
372  ASSERT_HOST (pos < len);
373  ASSERT_HOST (len > 0);
374 
375  len--;
376  if (len > 0)
377  new_ptr = (REJ *) memset (alloc_struct (len * sizeof (REJ), "REJ"),
378  0, len * sizeof (REJ));
379  else
380  new_ptr = NULL;
381 
382  for (i = 0; i < pos; i++)
383  new_ptr[i] = ptr[i]; //copy pre pos
384 
385  for (; pos < len; pos++)
386  new_ptr[pos] = ptr[pos + 1]; //copy post pos
387 
388  //delete old map
389  free_struct (ptr, (len + 1) * sizeof (REJ), "REJ");
390  ptr = new_ptr;
391 }
392 
393 
394 void REJMAP::print(FILE *fp) {
395  int i;
396  char buff[512];
397 
398  for (i = 0; i < len; i++) {
399  buff[i] = ptr[i].display_char ();
400  }
401  buff[i] = '\0';
402  fprintf (fp, "\"%s\"", buff);
403 }
404 
405 
406 void REJMAP::full_print(FILE *fp) {
407  int i;
408 
409  for (i = 0; i < len; i++) {
410  ptr[i].full_print (fp);
411  fprintf (fp, "\n");
412  }
413 }
414 
415 
416 void REJMAP::rej_word_small_xht() { //Reject whole word
417  int i;
418 
419  for (i = 0; i < len; i++) {
420  ptr[i].setrej_small_xht ();
421  }
422 }
423 
424 
425 void REJMAP::rej_word_tess_failure() { //Reject whole word
426  int i;
427 
428  for (i = 0; i < len; i++) {
429  ptr[i].setrej_tess_failure ();
430  }
431 }
432 
433 
434 void REJMAP::rej_word_not_tess_accepted() { //Reject whole word
435  int i;
436 
437  for (i = 0; i < len; i++) {
438  if (ptr[i].accepted()) ptr[i].setrej_not_tess_accepted();
439  }
440 }
441 
442 
443 void REJMAP::rej_word_contains_blanks() { //Reject whole word
444  int i;
445 
446  for (i = 0; i < len; i++) {
447  if (ptr[i].accepted()) ptr[i].setrej_contains_blanks();
448  }
449 }
450 
451 
452 void REJMAP::rej_word_bad_permuter() { //Reject whole word
453  int i;
454 
455  for (i = 0; i < len; i++) {
456  if (ptr[i].accepted()) ptr[i].setrej_bad_permuter ();
457  }
458 }
459 
460 
461 void REJMAP::rej_word_xht_fixup() { //Reject whole word
462  int i;
463 
464  for (i = 0; i < len; i++) {
465  if (ptr[i].accepted()) ptr[i].setrej_xht_fixup();
466  }
467 }
468 
469 
470 void REJMAP::rej_word_no_alphanums() { //Reject whole word
471  int i;
472 
473  for (i = 0; i < len; i++) {
474  if (ptr[i].accepted()) ptr[i].setrej_no_alphanums();
475  }
476 }
477 
478 
479 void REJMAP::rej_word_mostly_rej() { //Reject whole word
480  int i;
481 
482  for (i = 0; i < len; i++) {
483  if (ptr[i].accepted()) ptr[i].setrej_mostly_rej();
484  }
485 }
486 
487 
488 void REJMAP::rej_word_bad_quality() { //Reject whole word
489  int i;
490 
491  for (i = 0; i < len; i++) {
492  if (ptr[i].accepted()) ptr[i].setrej_bad_quality();
493  }
494 }
495 
496 
497 void REJMAP::rej_word_doc_rej() { //Reject whole word
498  int i;
499 
500  for (i = 0; i < len; i++) {
501  if (ptr[i].accepted()) ptr[i].setrej_doc_rej();
502  }
503 }
504 
505 
506 void REJMAP::rej_word_block_rej() { //Reject whole word
507  int i;
508 
509  for (i = 0; i < len; i++) {
510  if (ptr[i].accepted()) ptr[i].setrej_block_rej();
511  }
512 }
513 
514 
515 void REJMAP::rej_word_row_rej() { //Reject whole word
516  int i;
517 
518  for (i = 0; i < len; i++) {
519  if (ptr[i].accepted()) ptr[i].setrej_row_rej();
520  }
521 }
void setrej_hyphen_accept()
Definition: rejctmap.cpp:208
int count(LIST var_list)
Definition: oldlist.cpp:103
BOOL8 quality_recoverable_rejects()
Definition: rejctmap.cpp:354
void rej_word_bad_quality()
Definition: rejctmap.cpp:488
BOOL8 recoverable_rejects()
Definition: rejctmap.cpp:343
void print(FILE *fp)
Definition: rejctmap.cpp:394
void setrej_minimal_rej_accept()
Definition: rejctmap.cpp:228
#define TRUE
Definition: capi.h:45
void rej_word_doc_rej()
Definition: rejctmap.cpp:497
void free_struct(void *deadstruct, inT32, const char *)
Definition: memry.cpp:43
short inT16
Definition: host.h:33
void setrej_xht_fixup()
Definition: rejctmap.cpp:178
void setrej_no_alphanums()
Definition: rejctmap.cpp:168
void setrej_not_tess_accepted()
Definition: rejctmap.cpp:141
void full_print(FILE *fp)
Definition: rejctmap.cpp:406
void initialise(inT16 length)
Definition: rejctmap.cpp:318
void setrej_block_rej()
Definition: rejctmap.cpp:193
void rej_word_not_tess_accepted()
Definition: rejctmap.cpp:434
inT32 length() const
Definition: rejctmap.h:236
void setrej_poor_match()
Definition: rejctmap.cpp:136
void setrej_small_xht()
Definition: rejctmap.cpp:101
BOOL8 accept_if_good_quality()
Definition: rejctmap.cpp:83
void remove_pos(inT16 pos)
Definition: rejctmap.cpp:365
void setrej_row_rej()
Definition: rejctmap.cpp:198
void rej_word_tess_failure()
Definition: rejctmap.cpp:425
REJMAP()
Definition: rejctmap.h:210
void rej_word_xht_fixup()
Definition: rejctmap.cpp:461
unsigned char BOOL8
Definition: host.h:46
BOOL8 rejected()
Definition: rejctmap.cpp:73
void setrej_tess_failure()
Definition: rejctmap.cpp:96
void setrej_1Il_conflict()
Definition: rejctmap.cpp:111
void setrej_hyphen()
Definition: rejctmap.cpp:158
void rej_word_block_rej()
Definition: rejctmap.cpp:506
BOOL8 perm_rejected()
Definition: rejctmap.cpp:24
void full_print(FILE *fp)
Definition: rejctmap.cpp:234
inT16 accept_count()
Definition: rejctmap.cpp:331
void setrej_nn_accept()
Definition: rejctmap.cpp:213
void rej_word_mostly_rej()
Definition: rejctmap.cpp:479
void setrej_rej_cblob()
Definition: rejctmap.cpp:121
#define FALSE
Definition: capi.h:46
REJMAP & operator=(const REJMAP &source)
Definition: rejctmap.cpp:297
void setrej_bad_repetition()
Definition: rejctmap.cpp:131
void setrej_contains_blanks()
Definition: rejctmap.cpp:147
BOOL8 flag(REJ_FLAGS rej_flag)
Definition: rejctmap.h:135
void rej_word_small_xht()
Definition: rejctmap.cpp:416
void setrej_bad_quality()
Definition: rejctmap.cpp:183
void rej_word_row_rej()
Definition: rejctmap.cpp:515
void rej_word_bad_permuter()
Definition: rejctmap.cpp:452
void setrej_mostly_rej()
Definition: rejctmap.cpp:173
void setrej_postNN_1Il()
Definition: rejctmap.cpp:116
void setrej_mm_accept()
Definition: rejctmap.cpp:218
Definition: rejctmap.h:99
char display_char()
Definition: rejctmap.h:142
void setrej_unlv_rej()
Definition: rejctmap.cpp:203
void setrej_mm_reject()
Definition: rejctmap.cpp:126
void rej_word_contains_blanks()
Definition: rejctmap.cpp:443
void * alloc_struct(inT32 count, const char *)
Definition: memry.cpp:39
void rej_word_no_alphanums()
Definition: rejctmap.cpp:470
void setrej_doc_rej()
Definition: rejctmap.cpp:188
void setrej_edge_char()
Definition: rejctmap.cpp:106
void setrej_bad_permuter()
Definition: rejctmap.cpp:153
#define ASSERT_HOST(x)
Definition: errcode.h:84
void setrej_dubious()
Definition: rejctmap.cpp:163
void setrej_quality_accept()
Definition: rejctmap.cpp:223