tesseract  3.05.02
tesseract::WordSizeModel Class Reference

#include <word_size_model.h>

Public Member Functions

 WordSizeModel (CharSet *, bool contextual)
 
virtual ~WordSizeModel ()
 
int Cost (CharSamp **samp_array, int samp_cnt) const
 
bool Save (string file_name)
 
int FontCount () const
 
const FontPairSizeInfoFontInfo () const
 

Static Public Member Functions

static WordSizeModelCreate (const string &data_file_path, const string &lang, CharSet *char_set, bool contextual)
 
static double PairCost (int width_0, int height_0, int top_0, int width_1, int height_1, int top_1, const PairSizeInfo &pair_info)
 
static int SizeCode (int cls_id, int start, int end)
 

Detailed Description

Definition at line 53 of file word_size_model.h.

Constructor & Destructor Documentation

◆ WordSizeModel()

tesseract::WordSizeModel::WordSizeModel ( CharSet char_set,
bool  contextual 
)

Definition at line 28 of file word_size_model.cpp.

28  {
29  char_set_ = char_set;
30  contextual_ = contextual;
31 }

◆ ~WordSizeModel()

tesseract::WordSizeModel::~WordSizeModel ( )
virtual

Definition at line 33 of file word_size_model.cpp.

33  {
34  for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
35  FontPairSizeInfo fnt_info = font_pair_size_models_[fnt];
36  delete []fnt_info.pair_size_info[0];
37  delete []fnt_info.pair_size_info;
38  }
39 }

Member Function Documentation

◆ Cost()

int tesseract::WordSizeModel::Cost ( CharSamp **  samp_array,
int  samp_cnt 
) const

Definition at line 195 of file word_size_model.cpp.

195  {
196  if (samp_cnt < 2) {
197  return 0;
198  }
199  double best_dist = static_cast<double>(WORST_COST);
200  int best_fnt = -1;
201  for (int fnt = 0; fnt < font_pair_size_models_.size(); fnt++) {
202  const FontPairSizeInfo *fnt_info = &font_pair_size_models_[fnt];
203  double mean_dist = 0;
204  int pair_cnt = 0;
205 
206  for (int smp_0 = 0; smp_0 < samp_cnt; smp_0++) {
207  int cls_0 = char_set_->ClassID(samp_array[smp_0]->StrLabel());
208  if (cls_0 < 1) {
209  continue;
210  }
211  // compute size code for samp 0 based on class id and position
212  int size_code_0;
213  if (contextual_) {
214  size_code_0 = SizeCode(cls_0,
215  samp_array[smp_0]->FirstChar() == 0 ? 0 : 1,
216  samp_array[smp_0]->LastChar() == 0 ? 0 : 1);
217  } else {
218  size_code_0 = cls_0;
219  }
220 
221  int char0_height = samp_array[smp_0]->Height();
222  int char0_width = samp_array[smp_0]->Width();
223  int char0_top = samp_array[smp_0]->Top();
224 
225  for (int smp_1 = smp_0 + 1; smp_1 < samp_cnt; smp_1++) {
226  int cls_1 = char_set_->ClassID(samp_array[smp_1]->StrLabel());
227  if (cls_1 < 1) {
228  continue;
229  }
230  // compute size code for samp 0 based on class id and position
231  int size_code_1;
232  if (contextual_) {
233  size_code_1 = SizeCode(cls_1,
234  samp_array[smp_1]->FirstChar() == 0 ? 0 : 1,
235  samp_array[smp_1]->LastChar() == 0 ? 0 : 1);
236  } else {
237  size_code_1 = cls_1;
238  }
239  double dist = PairCost(
240  char0_width, char0_height, char0_top, samp_array[smp_1]->Width(),
241  samp_array[smp_1]->Height(), samp_array[smp_1]->Top(),
242  fnt_info->pair_size_info[size_code_0][size_code_1]);
243  if (dist > 0) {
244  mean_dist += dist;
245  pair_cnt++;
246  }
247  } // smp_1
248  } // smp_0
249  if (pair_cnt == 0) {
250  continue;
251  }
252  mean_dist /= pair_cnt;
253  if (best_fnt == -1 || mean_dist < best_dist) {
254  best_dist = mean_dist;
255  best_fnt = fnt;
256  }
257  }
258  if (best_fnt == -1) {
259  return static_cast<int>(WORST_COST);
260  } else {
261  return static_cast<int>(best_dist);
262  }
263 }
static int SizeCode(int cls_id, int start, int end)
static double PairCost(int width_0, int height_0, int top_0, int width_1, int height_1, int top_1, const PairSizeInfo &pair_info)
#define WORST_COST
Definition: cube_const.h:30
int ClassID(const char_32 *str) const
Definition: char_set.h:54

◆ Create()

WordSizeModel * tesseract::WordSizeModel::Create ( const string &  data_file_path,
const string &  lang,
CharSet char_set,
bool  contextual 
)
static

Definition at line 41 of file word_size_model.cpp.

44  {
45  WordSizeModel *obj = new WordSizeModel(char_set, contextual);
46 
47  if (!obj->Init(data_file_path, lang)) {
48  delete obj;
49  return NULL;
50  }
51  return obj;
52 }
WordSizeModel(CharSet *, bool contextual)

◆ FontCount()

int tesseract::WordSizeModel::FontCount ( ) const
inline

Definition at line 72 of file word_size_model.h.

72  {
73  return font_pair_size_models_.size();
74  }

◆ FontInfo()

const FontPairSizeInfo* tesseract::WordSizeModel::FontInfo ( ) const
inline

Definition at line 75 of file word_size_model.h.

75  {
76  return &font_pair_size_models_[0];
77  }

◆ PairCost()

double tesseract::WordSizeModel::PairCost ( int  width_0,
int  height_0,
int  top_0,
int  width_1,
int  height_1,
int  top_1,
const PairSizeInfo pair_info 
)
static

Definition at line 265 of file word_size_model.cpp.

267  {
268  double scale_factor = static_cast<double>(pair_info.hgt_0) /
269  static_cast<double>(height_0);
270  double dist = 0.0;
271  if (scale_factor > 0) {
272  double norm_width_0 = width_0 * scale_factor;
273  double norm_width_1 = width_1 * scale_factor;
274  double norm_height_1 = height_1 * scale_factor;
275  double norm_delta_top = (top_1 - top_0) * scale_factor;
276 
277  // accumulate the distance between the model character and the
278  // predicted one on all dimensions of the pair
279  dist += fabs(pair_info.wid_0 - norm_width_0);
280  dist += fabs(pair_info.wid_1 - norm_width_1);
281  dist += fabs(pair_info.hgt_1 - norm_height_1);
282  dist += fabs(pair_info.delta_top - norm_delta_top);
283  }
284  return dist;
285 }

◆ Save()

bool tesseract::WordSizeModel::Save ( string  file_name)

◆ SizeCode()

static int tesseract::WordSizeModel::SizeCode ( int  cls_id,
int  start,
int  end 
)
inlinestatic

Definition at line 80 of file word_size_model.h.

80  {
81  return (cls_id << 2) + (end << 1) + start;
82  }

The documentation for this class was generated from the following files: