tesseract  3.05.02
reject.h File Reference
#include "params.h"
#include "pageres.h"

Go to the source code of this file.

Functions

void reject_blanks (WERD_RES *word)
 
void reject_poor_matches (WERD_RES *word)
 
float compute_reject_threshold (WERD_CHOICE *word)
 
BOOL8 word_contains_non_1_digit (const char *word, const char *word_lengths)
 
void dont_allow_1Il (WERD_RES *word)
 
void flip_hyphens (WERD_RES *word)
 
void flip_0O (WERD_RES *word)
 
BOOL8 non_0_digit (const char *str, int length)
 

Function Documentation

◆ compute_reject_threshold()

float compute_reject_threshold ( WERD_CHOICE word)

Definition at line 226 of file reject.cpp.

226  {
227  float threshold; // rejection threshold
228  float bestgap = 0.0f; // biggest gap
229  float gapstart; // bottom of gap
230  // super iterator
231  BLOB_CHOICE_IT choice_it; // real iterator
232 
233  int blob_count = word->length();
234  GenericVector<float> ratings;
235  ratings.resize_no_init(blob_count);
236  for (int i = 0; i < blob_count; ++i) {
237  ratings[i] = word->certainty(i);
238  }
239  ratings.sort();
240  gapstart = ratings[0] - 1; // all reject if none better
241  if (blob_count >= 3) {
242  for (int index = 0; index < blob_count - 1; index++) {
243  if (ratings[index + 1] - ratings[index] > bestgap) {
244  bestgap = ratings[index + 1] - ratings[index];
245  // find biggest
246  gapstart = ratings[index];
247  }
248  }
249  }
250  threshold = gapstart + bestgap / 2;
251 
252  return threshold;
253 }
void resize_no_init(int size)
Definition: genericvector.h:66
float certainty() const
Definition: ratngs.h:328
int length() const
Definition: ratngs.h:301

◆ dont_allow_1Il()

void dont_allow_1Il ( WERD_RES word)

◆ flip_0O()

void flip_0O ( WERD_RES word)

◆ flip_hyphens()

void flip_hyphens ( WERD_RES word)

◆ non_0_digit()

BOOL8 non_0_digit ( const char *  str,
int  length 
)

◆ reject_blanks()

void reject_blanks ( WERD_RES word)

Definition at line 178 of file reject.cpp.

178  {
179  inT16 i;
180  inT16 offset;
181 
182  for (i = 0, offset = 0; word->best_choice->unichar_string()[offset] != '\0';
183  offset += word->best_choice->unichar_lengths()[i], i += 1) {
184  if (word->best_choice->unichar_string()[offset] == ' ')
185  //rej unrecognised blobs
186  word->reject_map[i].setrej_tess_failure ();
187  }
188 }
short inT16
Definition: host.h:33
const STRING & unichar_string() const
Definition: ratngs.h:525
WERD_CHOICE * best_choice
Definition: pageres.h:219
const STRING & unichar_lengths() const
Definition: ratngs.h:532
REJMAP reject_map
Definition: pageres.h:271

◆ reject_poor_matches()

void reject_poor_matches ( WERD_RES word)

Definition at line 207 of file reject.cpp.

207  {
208  float threshold = compute_reject_threshold(word->best_choice);
209  for (int i = 0; i < word->best_choice->length(); ++i) {
210  if (word->best_choice->unichar_id(i) == UNICHAR_SPACE)
211  word->reject_map[i].setrej_tess_failure();
212  else if (word->best_choice->certainty(i) < threshold)
213  word->reject_map[i].setrej_poor_match();
214  }
215 }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
float certainty() const
Definition: ratngs.h:328
float compute_reject_threshold(WERD_CHOICE *word)
Definition: reject.cpp:226
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
REJMAP reject_map
Definition: pageres.h:271

◆ word_contains_non_1_digit()

BOOL8 word_contains_non_1_digit ( const char *  word,
const char *  word_lengths 
)