96 int score1 = 0, score2 = 0;
99 for (
int f = 0; f < fonts_.
size(); ++f) {
100 if (fonts_[f].score > score1) {
102 fontinfo_id2_ = fontinfo_id_;
103 score1 = fonts_[f].score;
104 fontinfo_id_ = fonts_[f].fontinfo_id;
105 }
else if (fonts_[f].score > score2) {
106 score2 = fonts_[f].score;
107 fontinfo_id2_ = fonts_[f].fontinfo_id;
145 unichar_id_ = newunichar_id;
154 script_id_ = newscript_id;
157 matrix_cell_.
col = col;
158 matrix_cell_.
row = row;
180 tprintf(
"r%.2f c%.2f x[%g,%g]: %d %s",
182 min_xheight_, max_xheight_, unichar_id_,
183 (unicharset == NULL) ?
"" :
188 tprintf(
" script=%d, font1=%d, font2=%d, yshift=%g, classifier=%d\n",
189 script_id_, fontinfo_id_, fontinfo_id2_, yshift_, classifier_);
194 *
reinterpret_cast<const BLOB_CHOICE *
const *
>(p1);
196 *
reinterpret_cast<const BLOB_CHOICE *
const *
>(p2);
197 return (bc1->rating_ < bc2->rating_) ? -1 : 1;
274 static const char *permuter_name(
uinT8 permuter);
277 : unicharset_(unicharset) { this->init(8); }
279 : unicharset_(unicharset) { this->init(reserved); }
281 const char *src_lengths,
286 : unicharset_(&unicharset) {
287 this->init(src_string, src_lengths, src_rating,
288 src_certainty, src_permuter);
292 :
ELIST_LINK(word), unicharset_(word.unicharset_) {
293 this->init(word.
length());
305 return adjust_factor_;
308 adjust_factor_ = factor;
314 assert(index < length_);
315 return unichar_ids_[index];
318 return state_[index];
321 if (index < 0 || index >= length_)
323 return script_pos_[index];
332 return certainties_[index];
335 return min_x_height_;
338 return max_x_height_;
341 min_x_height_ = min_height;
342 max_x_height_ = max_height;
347 const char *permuter_name()
const;
351 BLOB_CHOICE_LIST* blob_choices(
int index,
MATRIX* ratings)
const;
358 assert(index < length_);
359 unichar_ids_[index] = unichar_id;
362 return dangerous_ambig_found_;
365 dangerous_ambig_found_ = value;
371 certainty_ = new_val;
388 reserved_, unichar_ids_);
390 reserved_, script_pos_);
394 reserved_, certainties_);
400 certainties_ =
new float[1];
407 inline void init(
int reserved) {
408 reserved_ = reserved;
412 state_ =
new int[reserved];
413 certainties_ =
new float[reserved];
421 adjust_factor_ = 1.0f;
424 min_x_height_ = 0.0f;
427 unichars_in_script_order_ =
false;
428 dangerous_ambig_found_ =
false;
436 void init(
const char *src_string,
const char *src_lengths,
437 float src_rating,
float src_certainty,
443 rating_ = kBadRating;
452 float rating,
float certainty) {
453 assert(reserved_ > length_);
455 this->set_unichar_id(unichar_id, blob_count,
456 rating, certainty, length_-1);
459 void append_unichar_id(
UNICHAR_ID unichar_id,
int blob_count,
460 float rating,
float certainty);
463 float rating,
float certainty,
int index) {
464 assert(index < length_);
465 unichar_ids_[index] = unichar_id;
466 state_[index] = blob_count;
467 certainties_[index] = certainty;
470 if (certainty < certainty_) {
471 certainty_ = certainty;
476 void set_blob_choice(
int index,
int blob_count,
479 bool contains_unichar_id(
UNICHAR_ID unichar_id)
const;
480 void remove_unichar_ids(
int index,
int num);
483 this->remove_unichar_ids(index, 1);
485 bool has_rtl_unichar_id()
const;
486 void reverse_and_mirror_unichar_ids();
491 void punct_stripped(
int *start_core,
int *end_core)
const;
496 void GetNonSuperscriptSpan(
int *start,
int *end)
const;
500 WERD_CHOICE shallow_copy(
int start,
int end)
const;
502 void string_and_lengths(
STRING *word_str,
STRING *word_lengths_str)
const;
505 for (
int i = 0; i < length_; ++i) {
506 word_str += unicharset_->debug_str(unichar_ids_[i]);
516 return unichars_in_script_order_ = in_script_order;
520 return unichars_in_script_order_;
526 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
527 return unichar_string_;
533 this->string_and_lengths(&unichar_string_, &unichar_lengths_);
534 return unichar_lengths_;
542 void SetScriptPositions(
bool small_caps,
TWERD* word);
550 const TBOX& blob_box,
556 int GetTopScriptID()
const;
559 void UpdateStateForSplit(
int blob_position);
562 int TotalOfStates()
const;
565 void print(
const char *msg)
const;
567 void print_state(
const char *msg)
const;
571 void DisplaySegmentation(
TWERD* word);
602 float adjust_factor_;
616 bool unichars_in_script_order_;
618 bool dangerous_ambig_found_;
622 mutable STRING unichar_string_;
623 mutable STRING unichar_lengths_;
638 BLOB_CHOICE_LIST *ratings,
void set_xgap_after(inT16 gap)
WERD_CHOICE(const UNICHARSET *unicharset, int reserved)
void remove_unichar_id(int index)
const UNICHAR_ID * unichar_ids() const
float certainty(int index) const
const STRING & unichar_string() const
inT16 xgap_before() const
void set_rating(float new_val)
void print(const UNICHARSET *unicharset) const
UNICHAR_ID unichar_id(int index) const
#define ELISTIZEH(CLASSNAME)
float adjust_factor() const
void operator=(const ELIST_LINK &)
int state(int index) const
static T * double_the_size_memcpy(int current_size, T *data)
void set_unichar_id(UNICHAR_ID unichar_id, int index)
void set_rating(float newrat)
ICOORD & operator+=(ICOORD &op1, const ICOORD &op2)
bool EqualIgnoringCaseAndTerminalPunct(const WERD_CHOICE &word1, const WERD_CHOICE &word2)
void set_script(int newscript_id)
tesseract::ScriptPos BlobPosition(int index) const
void print_ratings_list(const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET ¤t_unicharset)
float min_xheight() const
void set_permuter(uinT8 perm)
void set_fonts(const GenericVector< tesseract::ScoredFont > &fonts)
void double_the_size()
Make more space in unichar_id_ and fragment_lengths_ arrays.
static BLOB_CHOICE * deep_copy(const BLOB_CHOICE *src)
const GenericVector< tesseract::ScoredFont > & fonts() const
bool set_unichars_in_script_order(bool in_script_order)
WERD_CHOICE(const WERD_CHOICE &word)
void set_adjust_factor(float factor)
void set_dangerous_ambig_found_(bool value)
const UNICHARSET * unicharset() const
bool PosAndSizeAgree(const BLOB_CHOICE &other, float x_height, bool debug) const
UNICHAR_ID unichar_id() const
bool unichars_in_script_order() const
BlobChoiceClassifier classifier() const
BLOB_CHOICE * FindMatchingChoice(UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
const char * string() const
bool dangerous_ambig_found() const
void make_bad()
Set the fields in this choice to be default (bad) values.
void remove_last_unichar_id()
void set_certainty(float new_val)
inT16 fontinfo_id2() const
void set_certainty(float newrat)
static int SortByRating(const void *p1, const void *p2)
WERD_CHOICE(const char *src_string, const char *src_lengths, float src_rating, float src_certainty, uinT8 src_permuter, const UNICHARSET &unicharset)
STRING debug_str(UNICHAR_ID id) const
float min_x_height() const
void set_unichar_id(UNICHAR_ID newunichar_id)
void set_matrix_cell(int col, int row)
float max_x_height() const
bool IsClassified() const
static const float kBadRating
const MATRIX_COORD & matrix_cell()
const STRING & unichar_lengths() const
inT16 fontinfo_id() const
void set_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty, int index)
WERD_CHOICE(const UNICHARSET *unicharset)
void set_classifier(BlobChoiceClassifier classifier)
void set_xgap_before(inT16 gap)
float max_xheight() const
const STRING debug_string() const
void set_x_heights(float min_height, float max_height)
void append_unichar_id_space_allocated(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)
const char * ScriptPosToString(enum ScriptPos script_pos)