tesseract  3.05.02
WERD_RES Class Reference

#include <pageres.h>

Inheritance diagram for WERD_RES:
ELIST_LINK

Public Member Functions

 WERD_RES ()
 
 WERD_RES (WERD *the_word)
 
 WERD_RES (const WERD_RES &source)
 
 ~WERD_RES ()
 
const char * BestUTF8 (int blob_index, bool in_rtl_context) const
 
const char * RawUTF8 (int blob_index) const
 
UNICHARSET::Direction SymbolDirection (int blob_index) const
 
bool AnyRtlCharsInWord () const
 
bool AnyLtrCharsInWord () const
 
bool UnicharsInReadingOrder () const
 
void InitNonPointers ()
 
void InitPointers ()
 
void Clear ()
 
void ClearResults ()
 
void ClearWordChoices ()
 
void ClearRatings ()
 
WERD_RESoperator= (const WERD_RES &source)
 
void CopySimpleFields (const WERD_RES &source)
 
void InitForRetryRecognition (const WERD_RES &source)
 
bool SetupForRecognition (const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
 
void SetupBasicsFromChoppedWord (const UNICHARSET &unicharset_in)
 
void SetupFake (const UNICHARSET &uch)
 
void SetupWordScript (const UNICHARSET &unicharset_in)
 
void SetupBlamerBundle ()
 
void SetupBlobWidthsAndGaps ()
 
void InsertSeam (int blob_number, SEAM *seam)
 
bool AlternativeChoiceAdjustmentsWorseThan (float threshold) const
 
bool IsAmbiguous ()
 
bool StatesAllValid ()
 
void DebugWordChoices (bool debug, const char *word_to_debug)
 
void DebugTopChoice (const char *msg) const
 
void FilterWordChoices (int debug_level)
 
void ComputeAdaptionThresholds (float certainty_scale, float min_rating, float max_rating, float rating_margin, float *thresholds)
 
bool LogNewRawChoice (WERD_CHOICE *word_choice)
 
bool LogNewCookedChoice (int max_num_choices, bool debug, WERD_CHOICE *word_choice)
 
void PrintBestChoices () const
 
int GetBlobsWidth (int start_blob, int last_blob)
 
int GetBlobsGap (int blob_index)
 
BLOB_CHOICEGetBlobChoice (int index) const
 
BLOB_CHOICE_LIST * GetBlobChoices (int index) const
 
void ConsumeWordResults (WERD_RES *word)
 
void ReplaceBestChoice (WERD_CHOICE *choice)
 
void RebuildBestState ()
 
void CloneChoppedToRebuild ()
 
void SetupBoxWord ()
 
void SetScriptPositions ()
 
void SetAllScriptPositions (tesseract::ScriptPos position)
 
void FakeClassifyWord (int blob_count, BLOB_CHOICE **choices)
 
void FakeWordFromRatings (PermuterType permuter)
 
void BestChoiceToCorrectText ()
 
bool ConditionalBlobMerge (TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
 
void MergeAdjacentBlobs (int index)
 
UNICHAR_ID BothQuotes (UNICHAR_ID id1, UNICHAR_ID id2)
 
void fix_quotes ()
 
UNICHAR_ID BothHyphens (UNICHAR_ID id1, UNICHAR_ID id2)
 
bool HyphenBoxesOverlap (const TBOX &box1, const TBOX &box2)
 
void fix_hyphens ()
 
UNICHAR_ID BothSpaces (UNICHAR_ID id1, UNICHAR_ID id2)
 
void merge_tess_fails ()
 
void copy_on (WERD_RES *word_res)
 
bool PiecesAllNatural (int start, int count) const
 
- Public Member Functions inherited from ELIST_LINK
 ELIST_LINK ()
 
 ELIST_LINK (const ELIST_LINK &)
 
void operator= (const ELIST_LINK &)
 

Static Public Member Functions

static WERD_RESdeep_copy (const WERD_RES *src)
 

Public Attributes

WERDword
 
tesseract::BoxWordbln_boxes
 
ROWblob_row
 
DENORM denorm
 
const UNICHARSETuch_set
 
TWERDchopped_word
 
GenericVector< SEAM * > seam_array
 
GenericVector< int > blob_widths
 
GenericVector< int > blob_gaps
 
MATRIXratings
 
WERD_CHOICEbest_choice
 
WERD_CHOICEraw_choice
 
WERD_CHOICE_LIST best_choices
 
BlamerBundleblamer_bundle
 
TWERDrebuild_word
 
tesseract::BoxWordbox_word
 
GenericVector< int > best_state
 
GenericVector< STRINGcorrect_text
 
tesseract::Tesseracttesseract
 
WERD_CHOICEep_choice
 
REJMAP reject_map
 
BOOL8 tess_failed
 
BOOL8 tess_accepted
 
BOOL8 tess_would_adapt
 
BOOL8 done
 
bool small_caps
 
bool odd_size
 
inT8 italic
 
inT8 bold
 
const FontInfofontinfo
 
const FontInfofontinfo2
 
inT8 fontinfo_id_count
 
inT8 fontinfo_id2_count
 
BOOL8 guessed_x_ht
 
BOOL8 guessed_caps_ht
 
CRUNCH_MODE unlv_crunch_mode
 
float x_height
 
float caps_height
 
float baseline_shift
 
float space_certainty
 
BOOL8 combination
 
BOOL8 part_of_combo
 
BOOL8 reject_spaces
 

Detailed Description

Definition at line 155 of file pageres.h.

Constructor & Destructor Documentation

◆ WERD_RES() [1/3]

WERD_RES::WERD_RES ( )
inline

Definition at line 322 of file pageres.h.

322  {
323  InitNonPointers();
324  InitPointers();
325  }
void InitPointers()
Definition: pageres.cpp:1116
void InitNonPointers()
Definition: pageres.cpp:1088

◆ WERD_RES() [2/3]

WERD_RES::WERD_RES ( WERD the_word)
inline

Definition at line 326 of file pageres.h.

326  {
327  InitNonPointers();
328  InitPointers();
329  word = the_word;
330  }
void InitPointers()
Definition: pageres.cpp:1116
WERD * word
Definition: pageres.h:175
void InitNonPointers()
Definition: pageres.cpp:1088

◆ WERD_RES() [3/3]

WERD_RES::WERD_RES ( const WERD_RES source)
inline

Definition at line 333 of file pageres.h.

333  : ELIST_LINK(source) {
334  InitPointers();
335  *this = source; // see operator=
336  }
ELIST_LINK()
Definition: elst.h:92
void InitPointers()
Definition: pageres.cpp:1116

◆ ~WERD_RES()

WERD_RES::~WERD_RES ( )

Definition at line 1084 of file pageres.cpp.

1084  {
1085  Clear();
1086 }
void Clear()
Definition: pageres.cpp:1131

Member Function Documentation

◆ AlternativeChoiceAdjustmentsWorseThan()

bool WERD_RES::AlternativeChoiceAdjustmentsWorseThan ( float  threshold) const

Definition at line 430 of file pageres.cpp.

430  {
431  // The choices are not changed by this iteration.
432  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
433  for (wc_it.forward(); !wc_it.at_first(); wc_it.forward()) {
434  WERD_CHOICE* choice = wc_it.data();
435  if (choice->adjust_factor() <= threshold)
436  return false;
437  }
438  return true;
439 }
float adjust_factor() const
Definition: ratngs.h:304
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227

◆ AnyLtrCharsInWord()

bool WERD_RES::AnyLtrCharsInWord ( ) const
inline

Definition at line 392 of file pageres.h.

392  {
393  if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
394  return false;
395  for (int id = 0; id < best_choice->length(); id++) {
396  int unichar_id = best_choice->unichar_id(id);
397  if (unichar_id < 0 || unichar_id >= uch_set->size())
398  continue; // Ignore illegal chars.
399  UNICHARSET::Direction dir = uch_set->get_direction(unichar_id);
400  if (dir == UNICHARSET::U_LEFT_TO_RIGHT)
401  return true;
402  }
403  return false;
404  }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
int size() const
Definition: unicharset.h:297
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:638
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
const UNICHARSET * uch_set
Definition: pageres.h:192

◆ AnyRtlCharsInWord()

bool WERD_RES::AnyRtlCharsInWord ( ) const
inline

Definition at line 375 of file pageres.h.

375  {
376  if (uch_set == NULL || best_choice == NULL || best_choice->length() < 1)
377  return false;
378  for (int id = 0; id < best_choice->length(); id++) {
379  int unichar_id = best_choice->unichar_id(id);
380  if (unichar_id < 0 || unichar_id >= uch_set->size())
381  continue; // Ignore illegal chars.
383  uch_set->get_direction(unichar_id);
384  if (dir == UNICHARSET::U_RIGHT_TO_LEFT ||
387  return true;
388  }
389  return false;
390  }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
int size() const
Definition: unicharset.h:297
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:638
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
const UNICHARSET * uch_set
Definition: pageres.h:192

◆ BestChoiceToCorrectText()

void WERD_RES::BestChoiceToCorrectText ( )

Definition at line 917 of file pageres.cpp.

917  {
919  ASSERT_HOST(best_choice != NULL);
920  for (int i = 0; i < best_choice->length(); ++i) {
921  UNICHAR_ID choice_id = best_choice->unichar_id(i);
922  const char* blob_choice = uch_set->id_to_unichar(choice_id);
923  correct_text.push_back(STRING(blob_choice));
924  }
925 }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
WERD_CHOICE * best_choice
Definition: pageres.h:219
int push_back(T object)
int length() const
Definition: ratngs.h:301
GenericVector< STRING > correct_text
Definition: pageres.h:259
Definition: strngs.h:44
const UNICHARSET * uch_set
Definition: pageres.h:192
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
#define ASSERT_HOST(x)
Definition: errcode.h:84
int UNICHAR_ID
Definition: unichar.h:33

◆ BestUTF8()

const char* WERD_RES::BestUTF8 ( int  blob_index,
bool  in_rtl_context 
) const
inline

Definition at line 345 of file pageres.h.

345  {
346  if (blob_index < 0 || best_choice == NULL ||
347  blob_index >= best_choice->length())
348  return NULL;
349  UNICHAR_ID id = best_choice->unichar_id(blob_index);
350  if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
351  return NULL;
352  UNICHAR_ID mirrored = uch_set->get_mirror(id);
353  if (in_rtl_context && mirrored > 0 && mirrored != INVALID_UNICHAR_ID)
354  id = mirrored;
355  return uch_set->id_to_unichar_ext(id);
356  }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
int size() const
Definition: unicharset.h:297
UNICHAR_ID get_mirror(UNICHAR_ID unichar_id) const
Definition: unicharset.h:645
const char * id_to_unichar_ext(UNICHAR_ID id) const
Definition: unicharset.cpp:274
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
const UNICHARSET * uch_set
Definition: pageres.h:192
int UNICHAR_ID
Definition: unichar.h:33

◆ BothHyphens()

UNICHAR_ID WERD_RES::BothHyphens ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1024 of file pageres.cpp.

1024  {
1025  const char *ch = uch_set->id_to_unichar(id1);
1026  const char *next_ch = uch_set->id_to_unichar(id2);
1027  if (strlen(ch) == 1 && strlen(next_ch) == 1 &&
1028  (*ch == '-' || *ch == '~') && (*next_ch == '-' || *next_ch == '~'))
1029  return uch_set->unichar_to_id("-");
1030  return INVALID_UNICHAR_ID;
1031 }
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
const UNICHARSET * uch_set
Definition: pageres.h:192
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266

◆ BothQuotes()

UNICHAR_ID WERD_RES::BothQuotes ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1002 of file pageres.cpp.

1002  {
1003  const char *ch = uch_set->id_to_unichar(id1);
1004  const char *next_ch = uch_set->id_to_unichar(id2);
1005  if (is_simple_quote(ch, strlen(ch)) &&
1006  is_simple_quote(next_ch, strlen(next_ch)))
1007  return uch_set->unichar_to_id("\"");
1008  return INVALID_UNICHAR_ID;
1009 }
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
const UNICHARSET * uch_set
Definition: pageres.h:192
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266

◆ BothSpaces()

UNICHAR_ID WERD_RES::BothSpaces ( UNICHAR_ID  id1,
UNICHAR_ID  id2 
)

Definition at line 1053 of file pageres.cpp.

1053  {
1054  if (id1 == id2 && id1 == uch_set->unichar_to_id(" "))
1055  return id1;
1056  else
1057  return INVALID_UNICHAR_ID;
1058 }
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
const UNICHARSET * uch_set
Definition: pageres.h:192

◆ Clear()

void WERD_RES::Clear ( )

Definition at line 1131 of file pageres.cpp.

1131  {
1132  if (word != NULL && combination) {
1133  delete word;
1134  }
1135  word = NULL;
1136  delete blamer_bundle;
1137  blamer_bundle = NULL;
1138  ClearResults();
1139 }
BOOL8 combination
Definition: pageres.h:318
void ClearResults()
Definition: pageres.cpp:1141
BlamerBundle * blamer_bundle
Definition: pageres.h:230
WERD * word
Definition: pageres.h:175

◆ ClearRatings()

void WERD_RES::ClearRatings ( )

Definition at line 1186 of file pageres.cpp.

1186  {
1187  if (ratings != NULL) {
1189  delete ratings;
1190  ratings = NULL;
1191  }
1192 }
MATRIX * ratings
Definition: pageres.h:215
void delete_matrix_pointers()
Definition: matrix.h:452

◆ ClearResults()

void WERD_RES::ClearResults ( )

Definition at line 1141 of file pageres.cpp.

1141  {
1142  done = false;
1143  fontinfo = NULL;
1144  fontinfo2 = NULL;
1145  fontinfo_id_count = 0;
1146  fontinfo_id2_count = 0;
1147  if (bln_boxes != NULL) {
1148  delete bln_boxes;
1149  bln_boxes = NULL;
1150  }
1151  blob_row = NULL;
1152  if (chopped_word != NULL) {
1153  delete chopped_word;
1154  chopped_word = NULL;
1155  }
1156  if (rebuild_word != NULL) {
1157  delete rebuild_word;
1158  rebuild_word = NULL;
1159  }
1160  if (box_word != NULL) {
1161  delete box_word;
1162  box_word = NULL;
1163  }
1164  best_state.clear();
1165  correct_text.clear();
1167  seam_array.clear();
1168  blob_widths.clear();
1169  blob_gaps.clear();
1170  ClearRatings();
1171  ClearWordChoices();
1172  if (blamer_bundle != NULL) blamer_bundle->ClearResults();
1173 }
TWERD * rebuild_word
Definition: pageres.h:244
void ClearRatings()
Definition: pageres.cpp:1186
const FontInfo * fontinfo2
Definition: pageres.h:289
inT8 fontinfo_id2_count
Definition: pageres.h:291
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
tesseract::BoxWord * box_word
Definition: pageres.h:250
const FontInfo * fontinfo
Definition: pageres.h:288
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
GenericVector< int > blob_gaps
Definition: pageres.h:208
ROW * blob_row
Definition: pageres.h:186
BlamerBundle * blamer_bundle
Definition: pageres.h:230
TWERD * chopped_word
Definition: pageres.h:201
void delete_data_pointers()
GenericVector< STRING > correct_text
Definition: pageres.h:259
void ClearWordChoices()
Definition: pageres.cpp:1174
void ClearResults()
Definition: blamer.h:173
GenericVector< int > blob_widths
Definition: pageres.h:205
inT8 fontinfo_id_count
Definition: pageres.h:290
GenericVector< int > best_state
Definition: pageres.h:255
BOOL8 done
Definition: pageres.h:282

◆ ClearWordChoices()

void WERD_RES::ClearWordChoices ( )

Definition at line 1174 of file pageres.cpp.

1174  {
1175  best_choice = NULL;
1176  if (raw_choice != NULL) {
1177  delete raw_choice;
1178  raw_choice = NULL;
1179  }
1180  best_choices.clear();
1181  if (ep_choice != NULL) {
1182  delete ep_choice;
1183  ep_choice = NULL;
1184  }
1185 }
WERD_CHOICE * raw_choice
Definition: pageres.h:224
WERD_CHOICE * best_choice
Definition: pageres.h:219
WERD_CHOICE * ep_choice
Definition: pageres.h:270
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227

◆ CloneChoppedToRebuild()

void WERD_RES::CloneChoppedToRebuild ( )

Definition at line 828 of file pageres.cpp.

828  {
829  if (rebuild_word != NULL)
830  delete rebuild_word;
832  SetupBoxWord();
833  int word_len = box_word->length();
834  best_state.reserve(word_len);
835  correct_text.reserve(word_len);
836  for (int i = 0; i < word_len; ++i) {
839  }
840 }
TWERD * rebuild_word
Definition: pageres.h:244
int length() const
Definition: boxword.h:85
int push_back(T object)
tesseract::BoxWord * box_word
Definition: pageres.h:250
void SetupBoxWord()
Definition: pageres.cpp:843
TWERD * chopped_word
Definition: pageres.h:201
Definition: blobs.h:395
GenericVector< STRING > correct_text
Definition: pageres.h:259
Definition: strngs.h:44
void reserve(int size)
GenericVector< int > best_state
Definition: pageres.h:255

◆ ComputeAdaptionThresholds()

void WERD_RES::ComputeAdaptionThresholds ( float  certainty_scale,
float  min_rating,
float  max_rating,
float  rating_margin,
float *  thresholds 
)

Definition at line 553 of file pageres.cpp.

557  {
558  int chunk = 0;
559  int end_chunk = best_choice->state(0);
560  int end_raw_chunk = raw_choice->state(0);
561  int raw_blob = 0;
562  for (int i = 0; i < best_choice->length(); i++, thresholds++) {
563  float avg_rating = 0.0f;
564  int num_error_chunks = 0;
565 
566  // For each chunk in best choice blob i, count non-matching raw results.
567  while (chunk < end_chunk) {
568  if (chunk >= end_raw_chunk) {
569  ++raw_blob;
570  end_raw_chunk += raw_choice->state(raw_blob);
571  }
572  if (best_choice->unichar_id(i) !=
573  raw_choice->unichar_id(raw_blob)) {
574  avg_rating += raw_choice->certainty(raw_blob);
575  ++num_error_chunks;
576  }
577  ++chunk;
578  }
579 
580  if (num_error_chunks > 0) {
581  avg_rating /= num_error_chunks;
582  *thresholds = (avg_rating / -certainty_scale) * (1.0 - rating_margin);
583  } else {
584  *thresholds = max_rating;
585  }
586 
587  if (*thresholds > max_rating)
588  *thresholds = max_rating;
589  if (*thresholds < min_rating)
590  *thresholds = min_rating;
591  }
592 }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
WERD_CHOICE * raw_choice
Definition: pageres.h:224
int state(int index) const
Definition: ratngs.h:317
float certainty() const
Definition: ratngs.h:328
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301

◆ ConditionalBlobMerge()

bool WERD_RES::ConditionalBlobMerge ( TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *  class_cb,
TessResultCallback2< bool, const TBOX &, const TBOX &> *  box_cb 
)

Definition at line 932 of file pageres.cpp.

934  {
935  ASSERT_HOST(best_choice->length() == 0 || ratings != NULL);
936  bool modified = false;
937  for (int i = 0; i + 1 < best_choice->length(); ++i) {
938  UNICHAR_ID new_id = class_cb->Run(best_choice->unichar_id(i),
939  best_choice->unichar_id(i+1));
940  if (new_id != INVALID_UNICHAR_ID &&
941  (box_cb == NULL || box_cb->Run(box_word->BlobBox(i),
942  box_word->BlobBox(i + 1)))) {
943  // Raw choice should not be fixed.
944  best_choice->set_unichar_id(new_id, i);
945  modified = true;
947  const MATRIX_COORD& coord = best_choice->MatrixCoord(i);
948  if (!coord.Valid(*ratings)) {
949  ratings->IncreaseBandSize(coord.row + 1 - coord.col);
950  }
951  BLOB_CHOICE_LIST* blob_choices = GetBlobChoices(i);
952  if (FindMatchingChoice(new_id, blob_choices) == NULL) {
953  // Insert a fake result.
954  BLOB_CHOICE* blob_choice = new BLOB_CHOICE;
955  blob_choice->set_unichar_id(new_id);
956  BLOB_CHOICE_IT bc_it(blob_choices);
957  bc_it.add_before_then_move(blob_choice);
958  }
959  }
960  }
961  delete class_cb;
962  delete box_cb;
963  return modified;
964 }
void IncreaseBandSize(int bandwidth)
Definition: matrix.cpp:49
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:160
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
void set_unichar_id(UNICHAR_ID unichar_id, int index)
Definition: ratngs.h:357
void MergeAdjacentBlobs(int index)
Definition: pageres.cpp:968
WERD_CHOICE * best_choice
Definition: pageres.h:219
bool Valid(const MATRIX &m) const
Definition: matrix.h:610
tesseract::BoxWord * box_word
Definition: pageres.h:250
virtual R Run(A1, A2)=0
MATRIX * ratings
Definition: pageres.h:215
MATRIX_COORD MatrixCoord(int index) const
Definition: ratngs.cpp:280
void set_unichar_id(UNICHAR_ID newunichar_id)
Definition: ratngs.h:144
int length() const
Definition: ratngs.h:301
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:751
const TBOX & BlobBox(int index) const
Definition: boxword.h:86
#define ASSERT_HOST(x)
Definition: errcode.h:84
int UNICHAR_ID
Definition: unichar.h:33

◆ ConsumeWordResults()

void WERD_RES::ConsumeWordResults ( WERD_RES word)

Definition at line 757 of file pageres.cpp.

757  {
758  denorm = word->denorm;
759  blob_row = word->blob_row;
760  MovePointerData(&chopped_word, &word->chopped_word);
761  MovePointerData(&rebuild_word, &word->rebuild_word);
762  MovePointerData(&box_word, &word->box_word);
764  seam_array = word->seam_array;
765  word->seam_array.clear();
766  best_state.move(&word->best_state);
767  correct_text.move(&word->correct_text);
768  blob_widths.move(&word->blob_widths);
769  blob_gaps.move(&word->blob_gaps);
770  if (ratings != NULL) ratings->delete_matrix_pointers();
771  MovePointerData(&ratings, &word->ratings);
772  best_choice = word->best_choice;
773  MovePointerData(&raw_choice, &word->raw_choice);
774  best_choices.clear();
775  WERD_CHOICE_IT wc_it(&best_choices);
776  wc_it.add_list_after(&word->best_choices);
777  reject_map = word->reject_map;
778  if (word->blamer_bundle != NULL) {
779  assert(blamer_bundle != NULL);
780  blamer_bundle->CopyResults(*(word->blamer_bundle));
781  }
783 }
TWERD * rebuild_word
Definition: pageres.h:244
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void move(GenericVector< T > *from)
WERD_CHOICE * best_choice
Definition: pageres.h:219
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:241
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
tesseract::BoxWord * box_word
Definition: pageres.h:250
GenericVector< int > blob_gaps
Definition: pageres.h:208
MATRIX * ratings
Definition: pageres.h:215
ROW * blob_row
Definition: pageres.h:186
BlamerBundle * blamer_bundle
Definition: pageres.h:230
TWERD * chopped_word
Definition: pageres.h:201
void delete_data_pointers()
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
GenericVector< STRING > correct_text
Definition: pageres.h:259
void CopyResults(const BlamerBundle &other)
Definition: blamer.h:194
void delete_matrix_pointers()
Definition: matrix.h:452
GenericVector< int > blob_widths
Definition: pageres.h:205
WERD * word
Definition: pageres.h:175
GenericVector< int > best_state
Definition: pageres.h:255
DENORM denorm
Definition: pageres.h:190
REJMAP reject_map
Definition: pageres.h:271

◆ copy_on()

void WERD_RES::copy_on ( WERD_RES word_res)
inline

Definition at line 644 of file pageres.h.

644  { //from this word
645  word->set_flag(W_BOL, word->flag(W_BOL) || word_res->word->flag(W_BOL));
646  word->set_flag(W_EOL, word->flag(W_EOL) || word_res->word->flag(W_EOL));
647  word->copy_on(word_res->word);
648  }
void copy_on(WERD *other)
Definition: werd.cpp:234
Definition: werd.h:36
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
Definition: werd.h:35
WERD * word
Definition: pageres.h:175
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129

◆ CopySimpleFields()

void WERD_RES::CopySimpleFields ( const WERD_RES source)

Definition at line 241 of file pageres.cpp.

241  {
242  tess_failed = source.tess_failed;
243  tess_accepted = source.tess_accepted;
245  done = source.done;
247  small_caps = source.small_caps;
248  odd_size = source.odd_size;
249  italic = source.italic;
250  bold = source.bold;
251  fontinfo = source.fontinfo;
252  fontinfo2 = source.fontinfo2;
255  x_height = source.x_height;
256  caps_height = source.caps_height;
258  guessed_x_ht = source.guessed_x_ht;
260  reject_spaces = source.reject_spaces;
261  uch_set = source.uch_set;
262  tesseract = source.tesseract;
263 }
BOOL8 tess_accepted
Definition: pageres.h:280
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
BOOL8 tess_would_adapt
Definition: pageres.h:281
const FontInfo * fontinfo2
Definition: pageres.h:289
BOOL8 guessed_x_ht
Definition: pageres.h:292
inT8 fontinfo_id2_count
Definition: pageres.h:291
float x_height
Definition: pageres.h:295
BOOL8 tess_failed
Definition: pageres.h:272
bool odd_size
Definition: pageres.h:284
const FontInfo * fontinfo
Definition: pageres.h:288
BOOL8 guessed_caps_ht
Definition: pageres.h:293
float caps_height
Definition: pageres.h:296
inT8 italic
Definition: pageres.h:285
bool small_caps
Definition: pageres.h:283
const UNICHARSET * uch_set
Definition: pageres.h:192
inT8 bold
Definition: pageres.h:286
tesseract::Tesseract * tesseract
Definition: pageres.h:266
float baseline_shift
Definition: pageres.h:297
BOOL8 reject_spaces
Definition: pageres.h:320
inT8 fontinfo_id_count
Definition: pageres.h:290
BOOL8 done
Definition: pageres.h:282

◆ DebugTopChoice()

void WERD_RES::DebugTopChoice ( const char *  msg) const

Definition at line 490 of file pageres.cpp.

490  {
491  tprintf("Best choice: accepted=%d, adaptable=%d, done=%d : ",
493  if (best_choice == NULL)
494  tprintf("<Null choice>\n");
495  else
496  best_choice->print(msg);
497 }
BOOL8 tess_accepted
Definition: pageres.h:280
BOOL8 tess_would_adapt
Definition: pageres.h:281
void print() const
Definition: ratngs.h:564
WERD_CHOICE * best_choice
Definition: pageres.h:219
#define tprintf(...)
Definition: tprintf.h:31
BOOL8 done
Definition: pageres.h:282

◆ DebugWordChoices()

void WERD_RES::DebugWordChoices ( bool  debug,
const char *  word_to_debug 
)

Definition at line 471 of file pageres.cpp.

471  {
472  if (debug ||
473  (word_to_debug != NULL && *word_to_debug != '\0' && best_choice != NULL &&
474  best_choice->unichar_string() == STRING(word_to_debug))) {
475  if (raw_choice != NULL)
476  raw_choice->print("\nBest Raw Choice");
477 
478  WERD_CHOICE_IT it(&best_choices);
479  int index = 0;
480  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
481  WERD_CHOICE* choice = it.data();
482  STRING label;
483  label.add_str_int("\nCooked Choice #", index);
484  choice->print(label.string());
485  }
486  }
487 }
const STRING & unichar_string() const
Definition: ratngs.h:525
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void print() const
Definition: ratngs.h:564
WERD_CHOICE * best_choice
Definition: pageres.h:219
void add_str_int(const char *str, int number)
Definition: strngs.cpp:384
const char * string() const
Definition: strngs.cpp:201
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
Definition: strngs.h:44

◆ deep_copy()

static WERD_RES* WERD_RES::deep_copy ( const WERD_RES src)
inlinestatic

Definition at line 633 of file pageres.h.

633  {
634  WERD_RES* result = new WERD_RES(*src);
635  // That didn't copy the ratings, but we want a copy if there is one to
636  // begin with.
637  if (src->ratings != NULL)
638  result->ratings = src->ratings->DeepCopy();
639  return result;
640  }
WERD_RES()
Definition: pageres.h:322
MATRIX * ratings
Definition: pageres.h:215
MATRIX * DeepCopy() const
Definition: matrix.cpp:94

◆ FakeClassifyWord()

void WERD_RES::FakeClassifyWord ( int  blob_count,
BLOB_CHOICE **  choices 
)

Definition at line 872 of file pageres.cpp.

872  {
873  // Setup the WERD_RES.
874  ASSERT_HOST(box_word != NULL);
875  ASSERT_HOST(blob_count == box_word->length());
877  ClearRatings();
878  ratings = new MATRIX(blob_count, 1);
879  for (int c = 0; c < blob_count; ++c) {
880  BLOB_CHOICE_LIST* choice_list = new BLOB_CHOICE_LIST;
881  BLOB_CHOICE_IT choice_it(choice_list);
882  choice_it.add_after_then_move(choices[c]);
883  ratings->put(c, c, choice_list);
884  }
886  reject_map.initialise(blob_count);
887  done = true;
888 }
void initialise(inT16 length)
Definition: rejctmap.cpp:318
void ClearRatings()
Definition: pageres.cpp:1186
Definition: matrix.h:572
int length() const
Definition: boxword.h:85
tesseract::BoxWord * box_word
Definition: pageres.h:250
void put(ICOORD pos, const T &thing)
Definition: matrix.h:220
MATRIX * ratings
Definition: pageres.h:215
void ClearWordChoices()
Definition: pageres.cpp:1174
BOOL8 done
Definition: pageres.h:282
void FakeWordFromRatings(PermuterType permuter)
Definition: pageres.cpp:892
#define ASSERT_HOST(x)
Definition: errcode.h:84
REJMAP reject_map
Definition: pageres.h:271

◆ FakeWordFromRatings()

void WERD_RES::FakeWordFromRatings ( PermuterType  permuter)

Definition at line 892 of file pageres.cpp.

892  {
893  int num_blobs = ratings->dimension();
894  WERD_CHOICE* word_choice = new WERD_CHOICE(uch_set, num_blobs);
895  word_choice->set_permuter(permuter);
896  for (int b = 0; b < num_blobs; ++b) {
897  UNICHAR_ID unichar_id = UNICHAR_SPACE;
898  float rating = MAX_INT32;
899  float certainty = -MAX_INT32;
900  BLOB_CHOICE_LIST* choices = ratings->get(b, b);
901  if (choices != NULL && !choices->empty()) {
902  BLOB_CHOICE_IT bc_it(choices);
903  BLOB_CHOICE* choice = bc_it.data();
904  unichar_id = choice->unichar_id();
905  rating = choice->rating();
906  certainty = choice->certainty();
907  }
908  word_choice->append_unichar_id_space_allocated(unichar_id, 1, rating,
909  certainty);
910  }
911  LogNewRawChoice(word_choice);
912  // Ownership of word_choice taken by word here.
913  LogNewCookedChoice(1, false, word_choice);
914 }
int dimension() const
Definition: matrix.h:530
T get(ICOORD pos) const
Definition: matrix.h:228
void set_permuter(uinT8 perm)
Definition: ratngs.h:373
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:612
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76
float rating() const
Definition: ratngs.h:79
MATRIX * ratings
Definition: pageres.h:215
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:596
#define MAX_INT32
Definition: host.h:53
float certainty() const
Definition: ratngs.h:82
const UNICHARSET * uch_set
Definition: pageres.h:192
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
Definition: ratngs.h:450
int UNICHAR_ID
Definition: unichar.h:33

◆ FilterWordChoices()

void WERD_RES::FilterWordChoices ( int  debug_level)

Definition at line 504 of file pageres.cpp.

504  {
505  if (best_choice == NULL || best_choices.singleton())
506  return;
507 
508  if (debug_level >= 2)
509  best_choice->print("\nFiltering against best choice");
510  WERD_CHOICE_IT it(&best_choices);
511  int index = 0;
512  for (it.forward(); !it.at_first(); it.forward(), ++index) {
513  WERD_CHOICE* choice = it.data();
514  float threshold = StopperAmbigThreshold(best_choice->adjust_factor(),
515  choice->adjust_factor());
516  // i, j index the blob choice in choice, best_choice.
517  // chunk is an index into the chopped_word blobs (AKA chunks).
518  // Since the two words may use different segmentations of the chunks, we
519  // iterate over the chunks to find out whether a comparable blob
520  // classification is much worse than the best result.
521  int i = 0, j = 0, chunk = 0;
522  // Each iteration of the while deals with 1 chunk. On entry choice_chunk
523  // and best_chunk are the indices of the first chunk in the NEXT blob,
524  // i.e. we don't have to increment i, j while chunk < choice_chunk and
525  // best_chunk respectively.
526  int choice_chunk = choice->state(0), best_chunk = best_choice->state(0);
527  while (i < choice->length() && j < best_choice->length()) {
528  if (choice->unichar_id(i) != best_choice->unichar_id(j) &&
529  choice->certainty(i) - best_choice->certainty(j) < threshold) {
530  if (debug_level >= 2) {
531  STRING label;
532  label.add_str_int("\nDiscarding bad choice #", index);
533  choice->print(label.string());
534  tprintf("i %d j %d Chunk %d Choice->Blob[i].Certainty %.4g"
535  " BestChoice->ChunkCertainty[Chunk] %g Threshold %g\n",
536  i, j, chunk, choice->certainty(i),
537  best_choice->certainty(j), threshold);
538  }
539  delete it.extract();
540  break;
541  }
542  ++chunk;
543  // If needed, advance choice_chunk to keep up with chunk.
544  while (choice_chunk < chunk && ++i < choice->length())
545  choice_chunk += choice->state(i);
546  // If needed, advance best_chunk to keep up with chunk.
547  while (best_chunk < chunk && ++j < best_choice->length())
548  best_chunk += best_choice->state(j);
549  }
550  }
551 }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
void print() const
Definition: ratngs.h:564
float adjust_factor() const
Definition: ratngs.h:304
int state(int index) const
Definition: ratngs.h:317
float certainty() const
Definition: ratngs.h:328
WERD_CHOICE * best_choice
Definition: pageres.h:219
void add_str_int(const char *str, int number)
Definition: strngs.cpp:384
const char * string() const
Definition: strngs.cpp:201
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
#define tprintf(...)
Definition: tprintf.h:31
Definition: strngs.h:44

◆ fix_hyphens()

void WERD_RES::fix_hyphens ( )

Definition at line 1041 of file pageres.cpp.

1041  {
1042  if (!uch_set->contains_unichar("-") ||
1044  return; // Don't create it if it is disallowed.
1045 
1049 }
bool TESS_API contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:932
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool HyphenBoxesOverlap(const TBOX &box1, const TBOX &box2)
Definition: pageres.cpp:1035
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
const UNICHARSET * uch_set
Definition: pageres.h:192
UNICHAR_ID BothHyphens(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1024
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:826

◆ fix_quotes()

void WERD_RES::fix_quotes ( )

Definition at line 1012 of file pageres.cpp.

1012  {
1013  if (!uch_set->contains_unichar("\"") ||
1015  return; // Don't create it if it is disallowed.
1016 
1019  NULL);
1020 }
bool TESS_API contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:932
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
UNICHAR_ID BothQuotes(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1002
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
const UNICHARSET * uch_set
Definition: pageres.h:192
bool get_enabled(UNICHAR_ID unichar_id) const
Definition: unicharset.h:826

◆ GetBlobChoice()

BLOB_CHOICE * WERD_RES::GetBlobChoice ( int  index) const

Definition at line 742 of file pageres.cpp.

742  {
743  if (index < 0 || index >= best_choice->length()) return NULL;
744  BLOB_CHOICE_LIST* choices = GetBlobChoices(index);
745  return FindMatchingChoice(best_choice->unichar_id(index), choices);
746 }
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
Definition: ratngs.cpp:160
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
BLOB_CHOICE_LIST * GetBlobChoices(int index) const
Definition: pageres.cpp:751

◆ GetBlobChoices()

BLOB_CHOICE_LIST * WERD_RES::GetBlobChoices ( int  index) const

Definition at line 751 of file pageres.cpp.

751  {
752  return best_choice->blob_choices(index, ratings);
753 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
MATRIX * ratings
Definition: pageres.h:215
BLOB_CHOICE_LIST * blob_choices(int index, MATRIX *ratings) const
Definition: ratngs.cpp:268

◆ GetBlobsGap()

int WERD_RES::GetBlobsGap ( int  blob_index)

Definition at line 732 of file pageres.cpp.

732  {
733  if (blob_index < 0 || blob_index >= blob_gaps.size())
734  return 0;
735  return blob_gaps[blob_index];
736 }
GenericVector< int > blob_gaps
Definition: pageres.h:208
int size() const
Definition: genericvector.h:72

◆ GetBlobsWidth()

int WERD_RES::GetBlobsWidth ( int  start_blob,
int  last_blob 
)

Definition at line 722 of file pageres.cpp.

722  {
723  int result = 0;
724  for (int b = start_blob; b <= last_blob; ++b) {
725  result += blob_widths[b];
726  if (b < last_blob)
727  result += blob_gaps[b];
728  }
729  return result;
730 }
GenericVector< int > blob_gaps
Definition: pageres.h:208
GenericVector< int > blob_widths
Definition: pageres.h:205

◆ HyphenBoxesOverlap()

bool WERD_RES::HyphenBoxesOverlap ( const TBOX box1,
const TBOX box2 
)

Definition at line 1035 of file pageres.cpp.

1035  {
1036  return box1.right() >= box2.left();
1037 }
inT16 left() const
Definition: rect.h:68
inT16 right() const
Definition: rect.h:75

◆ InitForRetryRecognition()

void WERD_RES::InitForRetryRecognition ( const WERD_RES source)

Definition at line 269 of file pageres.cpp.

269  {
270  word = source.word;
271  CopySimpleFields(source);
272  if (source.blamer_bundle != NULL) {
273  blamer_bundle = new BlamerBundle();
275  }
276 }
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:241
BlamerBundle * blamer_bundle
Definition: pageres.h:230
void CopyTruth(const BlamerBundle &other)
Definition: blamer.h:187
WERD * word
Definition: pageres.h:175

◆ InitNonPointers()

void WERD_RES::InitNonPointers ( )

Definition at line 1088 of file pageres.cpp.

1088  {
1089  tess_failed = FALSE;
1090  tess_accepted = FALSE;
1092  done = FALSE;
1094  small_caps = false;
1095  odd_size = false;
1096  italic = FALSE;
1097  bold = FALSE;
1098  // The fontinfos and tesseract count as non-pointers as they point to
1099  // data owned elsewhere.
1100  fontinfo = NULL;
1101  fontinfo2 = NULL;
1102  tesseract = NULL;
1103  fontinfo_id_count = 0;
1104  fontinfo_id2_count = 0;
1105  x_height = 0.0;
1106  caps_height = 0.0;
1107  baseline_shift = 0.0f;
1108  space_certainty = 0.0f;
1109  guessed_x_ht = TRUE;
1111  combination = FALSE;
1112  part_of_combo = FALSE;
1113  reject_spaces = FALSE;
1114 }
float space_certainty
Definition: pageres.h:300
BOOL8 combination
Definition: pageres.h:318
BOOL8 tess_accepted
Definition: pageres.h:280
CRUNCH_MODE unlv_crunch_mode
Definition: pageres.h:294
#define TRUE
Definition: capi.h:45
BOOL8 tess_would_adapt
Definition: pageres.h:281
const FontInfo * fontinfo2
Definition: pageres.h:289
BOOL8 guessed_x_ht
Definition: pageres.h:292
inT8 fontinfo_id2_count
Definition: pageres.h:291
float x_height
Definition: pageres.h:295
BOOL8 tess_failed
Definition: pageres.h:272
bool odd_size
Definition: pageres.h:284
BOOL8 part_of_combo
Definition: pageres.h:319
const FontInfo * fontinfo
Definition: pageres.h:288
#define FALSE
Definition: capi.h:46
BOOL8 guessed_caps_ht
Definition: pageres.h:293
float caps_height
Definition: pageres.h:296
inT8 italic
Definition: pageres.h:285
bool small_caps
Definition: pageres.h:283
inT8 bold
Definition: pageres.h:286
float baseline_shift
Definition: pageres.h:297
BOOL8 reject_spaces
Definition: pageres.h:320
inT8 fontinfo_id_count
Definition: pageres.h:290
BOOL8 done
Definition: pageres.h:282

◆ InitPointers()

void WERD_RES::InitPointers ( )

Definition at line 1116 of file pageres.cpp.

1116  {
1117  word = NULL;
1118  bln_boxes = NULL;
1119  blob_row = NULL;
1120  uch_set = NULL;
1121  chopped_word = NULL;
1122  rebuild_word = NULL;
1123  box_word = NULL;
1124  ratings = NULL;
1125  best_choice = NULL;
1126  raw_choice = NULL;
1127  ep_choice = NULL;
1128  blamer_bundle = NULL;
1129 }
TWERD * rebuild_word
Definition: pageres.h:244
WERD_CHOICE * raw_choice
Definition: pageres.h:224
WERD_CHOICE * best_choice
Definition: pageres.h:219
tesseract::BoxWord * box_word
Definition: pageres.h:250
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
MATRIX * ratings
Definition: pageres.h:215
ROW * blob_row
Definition: pageres.h:186
BlamerBundle * blamer_bundle
Definition: pageres.h:230
WERD_CHOICE * ep_choice
Definition: pageres.h:270
TWERD * chopped_word
Definition: pageres.h:201
const UNICHARSET * uch_set
Definition: pageres.h:192
WERD * word
Definition: pageres.h:175

◆ InsertSeam()

void WERD_RES::InsertSeam ( int  blob_number,
SEAM seam 
)

Definition at line 409 of file pageres.cpp.

409  {
410  // Insert the seam into the SEAMS array.
411  seam->PrepareToInsertSeam(seam_array, chopped_word->blobs, blob_number, true);
412  seam_array.insert(seam, blob_number);
413  if (ratings != NULL) {
414  // Expand the ratings matrix.
415  ratings = ratings->ConsumeAndMakeBigger(blob_number);
416  // Fix all the segmentation states.
417  if (raw_choice != NULL)
418  raw_choice->UpdateStateForSplit(blob_number);
419  WERD_CHOICE_IT wc_it(&best_choices);
420  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
421  WERD_CHOICE* choice = wc_it.data();
422  choice->UpdateStateForSplit(blob_number);
423  }
425  }
426 }
void UpdateStateForSplit(int blob_position)
Definition: ratngs.cpp:685
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void insert(T t, int index)
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:391
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
MATRIX * ConsumeAndMakeBigger(int ind)
Definition: matrix.cpp:58
MATRIX * ratings
Definition: pageres.h:215
TWERD * chopped_word
Definition: pageres.h:201
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
bool PrepareToInsertSeam(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int insert_index, bool modify)
Definition: seam.cpp:82

◆ IsAmbiguous()

bool WERD_RES::IsAmbiguous ( )

Definition at line 443 of file pageres.cpp.

443  {
444  return !best_choices.singleton() || best_choice->dangerous_ambig_found();
445 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
bool dangerous_ambig_found() const
Definition: ratngs.h:361
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227

◆ LogNewCookedChoice()

bool WERD_RES::LogNewCookedChoice ( int  max_num_choices,
bool  debug,
WERD_CHOICE word_choice 
)

Definition at line 612 of file pageres.cpp.

613  {
614  if (best_choice != NULL) {
615  // Throw out obviously bad choices to save some work.
616  // TODO(rays) Get rid of this! This piece of code produces different
617  // results according to the order in which words are found, which is an
618  // undesirable behavior. It would be better to keep all the choices and
619  // prune them later when more information is available.
620  float max_certainty_delta =
621  StopperAmbigThreshold(best_choice->adjust_factor(),
622  word_choice->adjust_factor());
623  if (max_certainty_delta > -kStopperAmbiguityThresholdOffset)
624  max_certainty_delta = -kStopperAmbiguityThresholdOffset;
625  if (word_choice->certainty() - best_choice->certainty() <
626  max_certainty_delta) {
627  if (debug) {
628  STRING bad_string;
629  word_choice->string_and_lengths(&bad_string, NULL);
630  tprintf("Discarding choice \"%s\" with an overly low certainty"
631  " %.3f vs best choice certainty %.3f (Threshold: %.3f)\n",
632  bad_string.string(), word_choice->certainty(),
634  max_certainty_delta + best_choice->certainty());
635  }
636  delete word_choice;
637  return false;
638  }
639  }
640 
641  // Insert in the list in order of increasing rating, but knock out worse
642  // string duplicates.
643  WERD_CHOICE_IT it(&best_choices);
644  const STRING& new_str = word_choice->unichar_string();
645  bool inserted = false;
646  int num_choices = 0;
647  if (!it.empty()) {
648  do {
649  WERD_CHOICE* choice = it.data();
650  if (choice->rating() > word_choice->rating() && !inserted) {
651  // Time to insert.
652  it.add_before_stay_put(word_choice);
653  inserted = true;
654  if (num_choices == 0)
655  best_choice = word_choice; // This is the new best.
656  ++num_choices;
657  }
658  if (choice->unichar_string() == new_str) {
659  if (inserted) {
660  // New is better.
661  delete it.extract();
662  } else {
663  // Old is better.
664  if (debug) {
665  tprintf("Discarding duplicate choice \"%s\", rating %g vs %g\n",
666  new_str.string(), word_choice->rating(), choice->rating());
667  }
668  delete word_choice;
669  return false;
670  }
671  } else {
672  ++num_choices;
673  if (num_choices > max_num_choices)
674  delete it.extract();
675  }
676  it.forward();
677  } while (!it.at_first());
678  }
679  if (!inserted && num_choices < max_num_choices) {
680  it.add_to_end(word_choice);
681  inserted = true;
682  if (num_choices == 0)
683  best_choice = word_choice; // This is the new best.
684  }
685  if (debug) {
686  if (inserted)
687  tprintf("New %s", best_choice == word_choice ? "Best" : "Secondary");
688  else
689  tprintf("Poor");
690  word_choice->print(" Word Choice");
691  }
692  if (!inserted) {
693  delete word_choice;
694  return false;
695  }
696  return true;
697 }
void string_and_lengths(STRING *word_str, STRING *word_lengths_str) const
Definition: ratngs.cpp:427
const STRING & unichar_string() const
Definition: ratngs.h:525
void print() const
Definition: ratngs.h:564
float adjust_factor() const
Definition: ratngs.h:304
float certainty() const
Definition: ratngs.h:328
WERD_CHOICE * best_choice
Definition: pageres.h:219
const char * string() const
Definition: strngs.cpp:201
float rating() const
Definition: ratngs.h:325
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
#define tprintf(...)
Definition: tprintf.h:31
Definition: strngs.h:44

◆ LogNewRawChoice()

bool WERD_RES::LogNewRawChoice ( WERD_CHOICE word_choice)

Definition at line 596 of file pageres.cpp.

596  {
597  if (raw_choice == NULL || word_choice->rating() < raw_choice->rating()) {
598  delete raw_choice;
599  raw_choice = new WERD_CHOICE(*word_choice);
601  return true;
602  }
603  return false;
604 }
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void set_permuter(uinT8 perm)
Definition: ratngs.h:373
float rating() const
Definition: ratngs.h:325

◆ merge_tess_fails()

void WERD_RES::merge_tess_fails ( )

Definition at line 1061 of file pageres.cpp.

1061  {
1064  int len = best_choice->length();
1065  ASSERT_HOST(reject_map.length() == len);
1066  ASSERT_HOST(box_word->length() == len);
1067  }
1068 }
bool ConditionalBlobMerge(TessResultCallback2< UNICHAR_ID, UNICHAR_ID, UNICHAR_ID > *class_cb, TessResultCallback2< bool, const TBOX &, const TBOX &> *box_cb)
Definition: pageres.cpp:932
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
inT32 length() const
Definition: rejctmap.h:236
int length() const
Definition: boxword.h:85
UNICHAR_ID BothSpaces(UNICHAR_ID id1, UNICHAR_ID id2)
Definition: pageres.cpp:1053
WERD_CHOICE * best_choice
Definition: pageres.h:219
tesseract::BoxWord * box_word
Definition: pageres.h:250
int length() const
Definition: ratngs.h:301
#define ASSERT_HOST(x)
Definition: errcode.h:84
REJMAP reject_map
Definition: pageres.h:271

◆ MergeAdjacentBlobs()

void WERD_RES::MergeAdjacentBlobs ( int  index)

Definition at line 968 of file pageres.cpp.

968  {
969  if (reject_map.length() == best_choice->length())
970  reject_map.remove_pos(index);
971  best_choice->remove_unichar_id(index + 1);
972  rebuild_word->MergeBlobs(index, index + 2);
973  box_word->MergeBoxes(index, index + 2);
974  if (index + 1 < best_state.length()) {
975  best_state[index] += best_state[index + 1];
976  best_state.remove(index + 1);
977  }
978 }
void remove_unichar_id(int index)
Definition: ratngs.h:482
TWERD * rebuild_word
Definition: pageres.h:244
inT32 length() const
Definition: rejctmap.h:236
void remove_pos(inT16 pos)
Definition: rejctmap.cpp:365
void remove(int index)
WERD_CHOICE * best_choice
Definition: pageres.h:219
void MergeBlobs(int start, int end)
Definition: blobs.cpp:892
tesseract::BoxWord * box_word
Definition: pageres.h:250
int length() const
Definition: ratngs.h:301
void MergeBoxes(int start, int end)
Definition: boxword.cpp:134
int length() const
Definition: genericvector.h:79
GenericVector< int > best_state
Definition: pageres.h:255
REJMAP reject_map
Definition: pageres.h:271

◆ operator=()

WERD_RES & WERD_RES::operator= ( const WERD_RES source)

Definition at line 178 of file pageres.cpp.

178  {
179  this->ELIST_LINK::operator=(source);
180  Clear();
181  if (source.combination) {
182  word = new WERD;
183  *word = *(source.word); // deep copy
184  } else {
185  word = source.word; // pt to same word
186  }
187  if (source.bln_boxes != NULL)
188  bln_boxes = new tesseract::BoxWord(*source.bln_boxes);
189  if (source.chopped_word != NULL)
190  chopped_word = new TWERD(*source.chopped_word);
191  if (source.rebuild_word != NULL)
192  rebuild_word = new TWERD(*source.rebuild_word);
193  // TODO(rays) Do we ever need to copy the seam_array?
194  blob_row = source.blob_row;
195  denorm = source.denorm;
196  if (source.box_word != NULL)
197  box_word = new tesseract::BoxWord(*source.box_word);
198  best_state = source.best_state;
199  correct_text = source.correct_text;
200  blob_widths = source.blob_widths;
201  blob_gaps = source.blob_gaps;
202  // None of the uses of operator= require the ratings matrix to be copied,
203  // so don't as it would be really slow.
204 
205  // Copy the cooked choices.
206  WERD_CHOICE_IT wc_it(const_cast<WERD_CHOICE_LIST*>(&source.best_choices));
207  WERD_CHOICE_IT wc_dest_it(&best_choices);
208  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward()) {
209  const WERD_CHOICE *choice = wc_it.data();
210  wc_dest_it.add_after_then_move(new WERD_CHOICE(*choice));
211  }
212  if (!wc_dest_it.empty()) {
213  wc_dest_it.move_to_first();
214  best_choice = wc_dest_it.data();
215  } else {
216  best_choice = NULL;
217  }
218 
219  if (source.raw_choice != NULL) {
220  raw_choice = new WERD_CHOICE(*source.raw_choice);
221  } else {
222  raw_choice = NULL;
223  }
224  if (source.ep_choice != NULL) {
225  ep_choice = new WERD_CHOICE(*source.ep_choice);
226  } else {
227  ep_choice = NULL;
228  }
229  reject_map = source.reject_map;
230  combination = source.combination;
231  part_of_combo = source.part_of_combo;
232  CopySimpleFields(source);
233  if (source.blamer_bundle != NULL) {
234  blamer_bundle = new BlamerBundle(*(source.blamer_bundle));
235  }
236  return *this;
237 }
BOOL8 combination
Definition: pageres.h:318
TWERD * rebuild_word
Definition: pageres.h:244
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void operator=(const ELIST_LINK &)
Definition: elst.h:101
void Clear()
Definition: pageres.cpp:1131
WERD_CHOICE * best_choice
Definition: pageres.h:219
void CopySimpleFields(const WERD_RES &source)
Definition: pageres.cpp:241
BOOL8 part_of_combo
Definition: pageres.h:319
tesseract::BoxWord * box_word
Definition: pageres.h:250
Definition: werd.h:60
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
GenericVector< int > blob_gaps
Definition: pageres.h:208
ROW * blob_row
Definition: pageres.h:186
BlamerBundle * blamer_bundle
Definition: pageres.h:230
WERD_CHOICE * ep_choice
Definition: pageres.h:270
TWERD * chopped_word
Definition: pageres.h:201
Definition: blobs.h:395
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
GenericVector< STRING > correct_text
Definition: pageres.h:259
GenericVector< int > blob_widths
Definition: pageres.h:205
WERD * word
Definition: pageres.h:175
GenericVector< int > best_state
Definition: pageres.h:255
DENORM denorm
Definition: pageres.h:190
REJMAP reject_map
Definition: pageres.h:271

◆ PiecesAllNatural()

bool WERD_RES::PiecesAllNatural ( int  start,
int  count 
) const

Definition at line 1072 of file pageres.cpp.

1072  {
1073  // all seams must have no splits.
1074  for (int index = start; index < start + count - 1; ++index) {
1075  if (index >= 0 && index < seam_array.size()) {
1076  SEAM* seam = seam_array[index];
1077  if (seam != NULL && seam->HasAnySplits()) return false;
1078  }
1079  }
1080  return true;
1081 }
int count(LIST var_list)
Definition: oldlist.cpp:103
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
bool HasAnySplits() const
Definition: seam.h:67
int size() const
Definition: genericvector.h:72
Definition: seam.h:44

◆ PrintBestChoices()

void WERD_RES::PrintBestChoices ( ) const

Definition at line 709 of file pageres.cpp.

709  {
710  STRING alternates_str;
711  WERD_CHOICE_IT it(const_cast<WERD_CHOICE_LIST*>(&best_choices));
712  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
713  if (!it.at_first()) alternates_str += "\", \"";
714  alternates_str += it.data()->unichar_string();
715  }
716  tprintf("Alternates for \"%s\": {\"%s\"}\n",
717  best_choice->unichar_string().string(), alternates_str.string());
718 }
const STRING & unichar_string() const
Definition: ratngs.h:525
WERD_CHOICE * best_choice
Definition: pageres.h:219
const char * string() const
Definition: strngs.cpp:201
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
#define tprintf(...)
Definition: tprintf.h:31
Definition: strngs.h:44

◆ RawUTF8()

const char* WERD_RES::RawUTF8 ( int  blob_index) const
inline

Definition at line 358 of file pageres.h.

358  {
359  if (blob_index < 0 || blob_index >= raw_choice->length())
360  return NULL;
361  UNICHAR_ID id = raw_choice->unichar_id(blob_index);
362  if (id < 0 || id >= uch_set->size() || id == INVALID_UNICHAR_ID)
363  return NULL;
364  return uch_set->id_to_unichar(id);
365  }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
WERD_CHOICE * raw_choice
Definition: pageres.h:224
int size() const
Definition: unicharset.h:297
int length() const
Definition: ratngs.h:301
const UNICHARSET * uch_set
Definition: pageres.h:192
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
int UNICHAR_ID
Definition: unichar.h:33

◆ RebuildBestState()

void WERD_RES::RebuildBestState ( )

Definition at line 800 of file pageres.cpp.

800  {
801  ASSERT_HOST(best_choice != NULL);
802  if (rebuild_word != NULL)
803  delete rebuild_word;
804  rebuild_word = new TWERD;
805  if (seam_array.empty())
807  best_state.truncate(0);
808  int start = 0;
809  for (int i = 0; i < best_choice->length(); ++i) {
810  int length = best_choice->state(i);
811  best_state.push_back(length);
812  if (length > 1) {
814  start + length - 1);
815  }
816  TBLOB* blob = chopped_word->blobs[start];
817  rebuild_word->blobs.push_back(new TBLOB(*blob));
818  if (length > 1) {
820  start + length - 1);
821  }
822  start += length;
823  }
824 }
void start_seam_list(TWERD *word, GenericVector< SEAM *> *seam_array)
Definition: seam.cpp:269
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
static void BreakPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:194
TWERD * rebuild_word
Definition: pageres.h:244
int state(int index) const
Definition: ratngs.h:317
static void JoinPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:216
WERD_CHOICE * best_choice
Definition: pageres.h:219
int push_back(T object)
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
TWERD * chopped_word
Definition: pageres.h:201
Definition: blobs.h:395
void truncate(int size)
int length() const
Definition: ratngs.h:301
Definition: blobs.h:261
GenericVector< int > best_state
Definition: pageres.h:255
bool empty() const
Definition: genericvector.h:84
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ ReplaceBestChoice()

void WERD_RES::ReplaceBestChoice ( WERD_CHOICE choice)

Definition at line 787 of file pageres.cpp.

787  {
788  best_choice = choice;
790  SetupBoxWord();
791  // Make up a fake reject map of the right length to keep the
792  // rejection pass happy.
796 }
BOOL8 tess_accepted
Definition: pageres.h:280
BOOL8 tess_would_adapt
Definition: pageres.h:281
void initialise(inT16 length)
Definition: rejctmap.cpp:318
WERD_CHOICE * best_choice
Definition: pageres.h:219
void SetScriptPositions()
Definition: pageres.cpp:853
void SetupBoxWord()
Definition: pageres.cpp:843
void RebuildBestState()
Definition: pageres.cpp:800
int length() const
Definition: genericvector.h:79
GenericVector< int > best_state
Definition: pageres.h:255
BOOL8 done
Definition: pageres.h:282
REJMAP reject_map
Definition: pageres.h:271

◆ SetAllScriptPositions()

void WERD_RES::SetAllScriptPositions ( tesseract::ScriptPos  position)

Definition at line 860 of file pageres.cpp.

860  {
862  WERD_CHOICE_IT wc_it(&best_choices);
863  for (wc_it.mark_cycle_pt(); !wc_it.cycled_list(); wc_it.forward())
864  wc_it.data()->SetAllScriptPositions(position);
865 }
WERD_CHOICE * raw_choice
Definition: pageres.h:224
void SetAllScriptPositions(tesseract::ScriptPos position)
Definition: ratngs.cpp:609
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227

◆ SetScriptPositions()

void WERD_RES::SetScriptPositions ( )

Definition at line 853 of file pageres.cpp.

853  {
855 }
WERD_CHOICE * best_choice
Definition: pageres.h:219
TWERD * chopped_word
Definition: pageres.h:201
void SetScriptPositions(bool small_caps, TWERD *word)
Definition: ratngs.cpp:528
bool small_caps
Definition: pageres.h:283

◆ SetupBasicsFromChoppedWord()

void WERD_RES::SetupBasicsFromChoppedWord ( const UNICHARSET unicharset_in)

Definition at line 334 of file pageres.cpp.

334  {
339 }
void start_seam_list(TWERD *word, GenericVector< SEAM *> *seam_array)
Definition: seam.cpp:269
void SetupBlobWidthsAndGaps()
Definition: pageres.cpp:391
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
TWERD * chopped_word
Definition: pageres.h:201
void ClearWordChoices()
Definition: pageres.cpp:1174
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:59

◆ SetupBlamerBundle()

void WERD_RES::SetupBlamerBundle ( )

Definition at line 384 of file pageres.cpp.

384  {
385  if (blamer_bundle != NULL) {
387  }
388 }
void SetupNormTruthWord(const DENORM &denorm)
Definition: blamer.cpp:145
BlamerBundle * blamer_bundle
Definition: pageres.h:230
DENORM denorm
Definition: pageres.h:190

◆ SetupBlobWidthsAndGaps()

void WERD_RES::SetupBlobWidthsAndGaps ( )

Definition at line 391 of file pageres.cpp.

391  {
393  blob_gaps.truncate(0);
394  int num_blobs = chopped_word->NumBlobs();
395  for (int b = 0; b < num_blobs; ++b) {
396  TBLOB *blob = chopped_word->blobs[b];
397  TBOX box = blob->bounding_box();
398  blob_widths.push_back(box.width());
399  if (b + 1 < num_blobs) {
401  chopped_word->blobs[b + 1]->bounding_box().left() - box.right());
402  }
403  }
404 }
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
inT16 width() const
Definition: rect.h:111
TBOX bounding_box() const
Definition: blobs.cpp:482
int push_back(T object)
GenericVector< int > blob_gaps
Definition: pageres.h:208
TWERD * chopped_word
Definition: pageres.h:201
void truncate(int size)
int NumBlobs() const
Definition: blobs.h:425
Definition: blobs.h:261
GenericVector< int > blob_widths
Definition: pageres.h:205
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75

◆ SetupBoxWord()

void WERD_RES::SetupBoxWord ( )

Definition at line 843 of file pageres.cpp.

843  {
844  if (box_word != NULL)
845  delete box_word;
849 }
TWERD * rebuild_word
Definition: pageres.h:244
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
Definition: boxword.cpp:95
tesseract::BoxWord * box_word
Definition: pageres.h:250
const BLOCK * block() const
Definition: normalis.h:275
void ComputeBoundingBoxes()
Definition: blobs.cpp:875
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:59
WERD * word
Definition: pageres.h:175
DENORM denorm
Definition: pageres.h:190

◆ SetupFake()

void WERD_RES::SetupFake ( const UNICHARSET uch)

Definition at line 343 of file pageres.cpp.

343  {
344  ClearResults();
345  SetupWordScript(unicharset_in);
346  chopped_word = new TWERD;
347  rebuild_word = new TWERD;
350  int blob_count = word->cblob_list()->length();
351  if (blob_count > 0) {
352  BLOB_CHOICE** fake_choices = new BLOB_CHOICE*[blob_count];
353  // For non-text blocks, just pass any blobs through to the box_word
354  // and call the word failed with a fake classification.
355  C_BLOB_IT b_it(word->cblob_list());
356  int blob_id = 0;
357  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
358  TBOX box = b_it.data()->bounding_box();
359  box_word->InsertBox(box_word->length(), box);
360  fake_choices[blob_id++] = new BLOB_CHOICE;
361  }
362  FakeClassifyWord(blob_count, fake_choices);
363  delete [] fake_choices;
364  } else {
365  WERD_CHOICE* word = new WERD_CHOICE(&unicharset_in);
366  word->make_bad();
368  // Ownership of word is taken by *this WERD_RES in LogNewCookedChoice.
369  LogNewCookedChoice(1, false, word);
370  }
371  tess_failed = true;
372  done = true;
373 }
void ClearResults()
Definition: pageres.cpp:1141
TWERD * rebuild_word
Definition: pageres.h:244
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:151
int length() const
Definition: boxword.h:85
bool LogNewCookedChoice(int max_num_choices, bool debug, WERD_CHOICE *word_choice)
Definition: pageres.cpp:612
BOOL8 tess_failed
Definition: pageres.h:272
tesseract::BoxWord * box_word
Definition: pageres.h:250
tesseract::BoxWord * bln_boxes
Definition: pageres.h:184
TWERD * chopped_word
Definition: pageres.h:201
Definition: blobs.h:395
bool LogNewRawChoice(WERD_CHOICE *word_choice)
Definition: pageres.cpp:596
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
Definition: pageres.cpp:872
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:375
Definition: rect.h:30
WERD * word
Definition: pageres.h:175
BOOL8 done
Definition: pageres.h:282
C_BLOB_LIST * cblob_list()
Definition: werd.h:100

◆ SetupForRecognition()

bool WERD_RES::SetupForRecognition ( const UNICHARSET unicharset_in,
tesseract::Tesseract tesseract,
Pix *  pix,
int  norm_mode,
const TBOX norm_box,
bool  numeric_mode,
bool  use_body_size,
bool  allow_detailed_fx,
ROW row,
const BLOCK block 
)

Definition at line 294 of file pageres.cpp.

301  {
302  tesseract::OcrEngineMode norm_mode_hint =
303  static_cast<tesseract::OcrEngineMode>(norm_mode);
304  tesseract = tess;
305  POLY_BLOCK* pb = block != NULL ? block->poly_block() : NULL;
306  if ((norm_mode_hint != tesseract::OEM_CUBE_ONLY &&
307  word->cblob_list()->empty()) || (pb != NULL && !pb->IsText())) {
308  // Empty words occur when all the blobs have been moved to the rej_blobs
309  // list, which seems to occur frequently in junk.
310  SetupFake(unicharset_in);
311  word->set_flag(W_REP_CHAR, false);
312  return false;
313  }
314  ClearResults();
315  SetupWordScript(unicharset_in);
316  chopped_word = TWERD::PolygonalCopy(allow_detailed_fx, word);
317  float word_xheight = use_body_size && row != NULL && row->body_size() > 0.0f
318  ? row->body_size() : x_height;
319  chopped_word->BLNormalize(block, row, pix, word->flag(W_INVERSE),
320  word_xheight, baseline_shift, numeric_mode,
321  norm_mode_hint, norm_box, &denorm);
322  blob_row = row;
323  SetupBasicsFromChoppedWord(unicharset_in);
325  int num_blobs = chopped_word->NumBlobs();
326  ratings = new MATRIX(num_blobs, kWordrecMaxNumJoinChunks);
327  tess_failed = false;
328  return true;
329 }
void ClearResults()
Definition: pageres.cpp:1141
void SetupBasicsFromChoppedWord(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:334
void BLNormalize(const BLOCK *block, const ROW *row, Pix *pix, bool inverse, float x_height, float baseline_shift, bool numeric_mode, tesseract::OcrEngineMode hint, const TBOX *norm_box, DENORM *word_denorm)
Definition: blobs.cpp:807
Definition: matrix.h:572
float x_height
Definition: pageres.h:295
Definition: werd.h:44
float body_size() const
Definition: ocrrow.h:70
BOOL8 tess_failed
Definition: pageres.h:272
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
void SetupFake(const UNICHARSET &uch)
Definition: pageres.cpp:343
BOOL8 flag(WERD_FLAGS mask) const
Definition: werd.h:128
MATRIX * ratings
Definition: pageres.h:215
ROW * blob_row
Definition: pageres.h:186
TWERD * chopped_word
Definition: pageres.h:201
void SetupBlamerBundle()
Definition: pageres.cpp:384
int NumBlobs() const
Definition: blobs.h:425
void SetupWordScript(const UNICHARSET &unicharset_in)
Definition: pageres.cpp:375
bool IsText() const
Definition: polyblk.h:52
const int kWordrecMaxNumJoinChunks
Definition: pageres.cpp:41
float baseline_shift
Definition: pageres.h:297
WERD * word
Definition: pageres.h:175
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
DENORM denorm
Definition: pageres.h:190
static TWERD * PolygonalCopy(bool allow_detailed_fx, WERD *src)
Definition: blobs.cpp:793

◆ SetupWordScript()

void WERD_RES::SetupWordScript ( const UNICHARSET unicharset_in)

Definition at line 375 of file pageres.cpp.

375  {
376  uch_set = &uch;
377  int script = uch.default_sid();
378  word->set_script_id(script);
379  word->set_flag(W_SCRIPT_HAS_XHEIGHT, uch.script_has_xheight());
380  word->set_flag(W_SCRIPT_IS_LATIN, script == uch.latin_sid());
381 }
const UNICHARSET * uch_set
Definition: pageres.h:192
int default_sid() const
Definition: unicharset.h:839
WERD * word
Definition: pageres.h:175
void set_script_id(int id)
Definition: werd.h:113
void set_flag(WERD_FLAGS mask, BOOL8 value)
Definition: werd.h:129

◆ StatesAllValid()

bool WERD_RES::StatesAllValid ( )

Definition at line 449 of file pageres.cpp.

449  {
450  int ratings_dim = ratings->dimension();
451  if (raw_choice->TotalOfStates() != ratings_dim) {
452  tprintf("raw_choice has total of states = %d vs ratings dim of %d\n",
453  raw_choice->TotalOfStates(), ratings_dim);
454  return false;
455  }
456  WERD_CHOICE_IT it(&best_choices);
457  int index = 0;
458  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++index) {
459  WERD_CHOICE* choice = it.data();
460  if (choice->TotalOfStates() != ratings_dim) {
461  tprintf("Cooked #%d has total of states = %d vs ratings dim of %d\n",
462  index, choice->TotalOfStates(), ratings_dim);
463  return false;
464  }
465  }
466  return true;
467 }
int dimension() const
Definition: matrix.h:530
WERD_CHOICE * raw_choice
Definition: pageres.h:224
MATRIX * ratings
Definition: pageres.h:215
int TotalOfStates() const
Definition: ratngs.cpp:697
WERD_CHOICE_LIST best_choices
Definition: pageres.h:227
#define tprintf(...)
Definition: tprintf.h:31

◆ SymbolDirection()

UNICHARSET::Direction WERD_RES::SymbolDirection ( int  blob_index) const
inline

Definition at line 367 of file pageres.h.

367  {
368  if (best_choice == NULL ||
369  blob_index >= best_choice->length() ||
370  blob_index < 0)
372  return uch_set->get_direction(best_choice->unichar_id(blob_index));
373  }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
Direction get_direction(UNICHAR_ID unichar_id) const
Definition: unicharset.h:638
WERD_CHOICE * best_choice
Definition: pageres.h:219
int length() const
Definition: ratngs.h:301
const UNICHARSET * uch_set
Definition: pageres.h:192

◆ UnicharsInReadingOrder()

bool WERD_RES::UnicharsInReadingOrder ( ) const
inline

Definition at line 409 of file pageres.h.

409  {
411  }
WERD_CHOICE * best_choice
Definition: pageres.h:219
bool unichars_in_script_order() const
Definition: ratngs.h:519

Member Data Documentation

◆ baseline_shift

float WERD_RES::baseline_shift

Definition at line 297 of file pageres.h.

◆ best_choice

WERD_CHOICE* WERD_RES::best_choice

Definition at line 219 of file pageres.h.

◆ best_choices

WERD_CHOICE_LIST WERD_RES::best_choices

Definition at line 227 of file pageres.h.

◆ best_state

GenericVector<int> WERD_RES::best_state

Definition at line 255 of file pageres.h.

◆ blamer_bundle

BlamerBundle* WERD_RES::blamer_bundle

Definition at line 230 of file pageres.h.

◆ bln_boxes

tesseract::BoxWord* WERD_RES::bln_boxes

Definition at line 184 of file pageres.h.

◆ blob_gaps

GenericVector<int> WERD_RES::blob_gaps

Definition at line 208 of file pageres.h.

◆ blob_row

ROW* WERD_RES::blob_row

Definition at line 186 of file pageres.h.

◆ blob_widths

GenericVector<int> WERD_RES::blob_widths

Definition at line 205 of file pageres.h.

◆ bold

inT8 WERD_RES::bold

Definition at line 286 of file pageres.h.

◆ box_word

tesseract::BoxWord* WERD_RES::box_word

Definition at line 250 of file pageres.h.

◆ caps_height

float WERD_RES::caps_height

Definition at line 296 of file pageres.h.

◆ chopped_word

TWERD* WERD_RES::chopped_word

Definition at line 201 of file pageres.h.

◆ combination

BOOL8 WERD_RES::combination

Definition at line 318 of file pageres.h.

◆ correct_text

GenericVector<STRING> WERD_RES::correct_text

Definition at line 259 of file pageres.h.

◆ denorm

DENORM WERD_RES::denorm

Definition at line 190 of file pageres.h.

◆ done

BOOL8 WERD_RES::done

Definition at line 282 of file pageres.h.

◆ ep_choice

WERD_CHOICE* WERD_RES::ep_choice

Definition at line 270 of file pageres.h.

◆ fontinfo

const FontInfo* WERD_RES::fontinfo

Definition at line 288 of file pageres.h.

◆ fontinfo2

const FontInfo* WERD_RES::fontinfo2

Definition at line 289 of file pageres.h.

◆ fontinfo_id2_count

inT8 WERD_RES::fontinfo_id2_count

Definition at line 291 of file pageres.h.

◆ fontinfo_id_count

inT8 WERD_RES::fontinfo_id_count

Definition at line 290 of file pageres.h.

◆ guessed_caps_ht

BOOL8 WERD_RES::guessed_caps_ht

Definition at line 293 of file pageres.h.

◆ guessed_x_ht

BOOL8 WERD_RES::guessed_x_ht

Definition at line 292 of file pageres.h.

◆ italic

inT8 WERD_RES::italic

Definition at line 285 of file pageres.h.

◆ odd_size

bool WERD_RES::odd_size

Definition at line 284 of file pageres.h.

◆ part_of_combo

BOOL8 WERD_RES::part_of_combo

Definition at line 319 of file pageres.h.

◆ ratings

MATRIX* WERD_RES::ratings

Definition at line 215 of file pageres.h.

◆ raw_choice

WERD_CHOICE* WERD_RES::raw_choice

Definition at line 224 of file pageres.h.

◆ rebuild_word

TWERD* WERD_RES::rebuild_word

Definition at line 244 of file pageres.h.

◆ reject_map

REJMAP WERD_RES::reject_map

Definition at line 271 of file pageres.h.

◆ reject_spaces

BOOL8 WERD_RES::reject_spaces

Definition at line 320 of file pageres.h.

◆ seam_array

GenericVector<SEAM*> WERD_RES::seam_array

Definition at line 203 of file pageres.h.

◆ small_caps

bool WERD_RES::small_caps

Definition at line 283 of file pageres.h.

◆ space_certainty

float WERD_RES::space_certainty

Definition at line 300 of file pageres.h.

◆ tess_accepted

BOOL8 WERD_RES::tess_accepted

Definition at line 280 of file pageres.h.

◆ tess_failed

BOOL8 WERD_RES::tess_failed

Definition at line 272 of file pageres.h.

◆ tess_would_adapt

BOOL8 WERD_RES::tess_would_adapt

Definition at line 281 of file pageres.h.

◆ tesseract

tesseract::Tesseract* WERD_RES::tesseract

Definition at line 266 of file pageres.h.

◆ uch_set

const UNICHARSET* WERD_RES::uch_set

Definition at line 192 of file pageres.h.

◆ unlv_crunch_mode

CRUNCH_MODE WERD_RES::unlv_crunch_mode

Definition at line 294 of file pageres.h.

◆ word

WERD* WERD_RES::word

Definition at line 175 of file pageres.h.

◆ x_height

float WERD_RES::x_height

Definition at line 295 of file pageres.h.


The documentation for this class was generated from the following files: