tesseract  3.05.02
ratngs.cpp File Reference
#include "ratngs.h"
#include "blobs.h"
#include "callcpp.h"
#include "genericvector.h"
#include "matrix.h"
#include "normalis.h"
#include "unicharset.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Functions

 ELISTIZE (BLOB_CHOICE)
 
 ELISTIZE (WERD_CHOICE)
 
BLOB_CHOICEFindMatchingChoice (UNICHAR_ID char_id, BLOB_CHOICE_LIST *bc_list)
 
const char * tesseract::ScriptPosToString (enum ScriptPos script_pos)
 
bool EqualIgnoringCaseAndTerminalPunct (const WERD_CHOICE &word1, const WERD_CHOICE &word2)
 
void print_ratings_list (const char *msg, BLOB_CHOICE_LIST *ratings, const UNICHARSET &current_unicharset)
 

Variables

const int kMinSubscriptOffset = 20
 
const int kMinSuperscriptOffset = 20
 
const int kMaxDropCapBottom = -128
 
const double kMaxOverlapDenominator = 0.125
 
const double kMinXHeightMatch = 0.5
 
const double kMaxBaselineDrift = 0.0625
 

Function Documentation

◆ ELISTIZE() [1/2]

ELISTIZE ( BLOB_CHOICE  )

◆ ELISTIZE() [2/2]

ELISTIZE ( WERD_CHOICE  )

◆ EqualIgnoringCaseAndTerminalPunct()

bool EqualIgnoringCaseAndTerminalPunct ( const WERD_CHOICE word1,
const WERD_CHOICE word2 
)

Definition at line 791 of file ratngs.cpp.

792  {
793  const UNICHARSET *uchset = word1.unicharset();
794  if (word2.unicharset() != uchset) return false;
795  int w1start, w1end;
796  word1.punct_stripped(&w1start, &w1end);
797  int w2start, w2end;
798  word2.punct_stripped(&w2start, &w2end);
799  if (w1end - w1start != w2end - w2start) return false;
800  for (int i = 0; i < w1end - w1start; i++) {
801  if (uchset->to_lower(word1.unichar_id(w1start + i)) !=
802  uchset->to_lower(word2.unichar_id(w2start + i))) {
803  return false;
804  }
805  }
806  return true;
807 }
UNICHAR_ID unichar_id(int index) const
Definition: ratngs.h:313
const UNICHARSET * unicharset() const
Definition: ratngs.h:298
void punct_stripped(int *start_core, int *end_core) const
Definition: ratngs.cpp:361
UNICHAR_ID to_lower(UNICHAR_ID unichar_id) const
Definition: unicharset.h:652

◆ FindMatchingChoice()

BLOB_CHOICE* FindMatchingChoice ( UNICHAR_ID  char_id,
BLOB_CHOICE_LIST *  bc_list 
)

Definition at line 160 of file ratngs.cpp.

161  {
162  // Find the corresponding best BLOB_CHOICE.
163  BLOB_CHOICE_IT choice_it(bc_list);
164  for (choice_it.mark_cycle_pt(); !choice_it.cycled_list();
165  choice_it.forward()) {
166  BLOB_CHOICE* choice = choice_it.data();
167  if (choice->unichar_id() == char_id) {
168  return choice;
169  }
170  }
171  return NULL;
172 }
UNICHAR_ID unichar_id() const
Definition: ratngs.h:76

◆ print_ratings_list()

void print_ratings_list ( const char *  msg,
BLOB_CHOICE_LIST *  ratings,
const UNICHARSET current_unicharset 
)

print_ratings_list

Send all the ratings out to the logfile.

Parameters
msgintro message
ratingslist of ratings
current_unicharsetunicharset that can be used for id-to-unichar conversion

Definition at line 819 of file ratngs.cpp.

821  {
822  if (ratings->length() == 0) {
823  tprintf("%s:<none>\n", msg);
824  return;
825  }
826  if (*msg != '\0') {
827  tprintf("%s\n", msg);
828  }
829  BLOB_CHOICE_IT c_it;
830  c_it.set_to_list(ratings);
831  for (c_it.mark_cycle_pt(); !c_it.cycled_list(); c_it.forward()) {
832  c_it.data()->print(&current_unicharset);
833  if (!c_it.at_last()) tprintf("\n");
834  }
835  tprintf("\n");
836  fflush(stdout);
837 }
#define tprintf(...)
Definition: tprintf.h:31

Variable Documentation

◆ kMaxBaselineDrift

const double kMaxBaselineDrift = 0.0625

Definition at line 53 of file ratngs.cpp.

◆ kMaxDropCapBottom

const int kMaxDropCapBottom = -128

Definition at line 45 of file ratngs.cpp.

◆ kMaxOverlapDenominator

const double kMaxOverlapDenominator = 0.125

Definition at line 47 of file ratngs.cpp.

◆ kMinSubscriptOffset

const int kMinSubscriptOffset = 20

Definition at line 41 of file ratngs.cpp.

◆ kMinSuperscriptOffset

const int kMinSuperscriptOffset = 20

Definition at line 43 of file ratngs.cpp.

◆ kMinXHeightMatch

const double kMinXHeightMatch = 0.5

Definition at line 50 of file ratngs.cpp.