tesseract  3.05.02
tesseract::CharSamp Class Reference

#include <char_samp.h>

Inheritance diagram for tesseract::CharSamp:
tesseract::Bmp8

Public Member Functions

 CharSamp ()
 
 CharSamp (int wid, int hgt)
 
 CharSamp (int left, int top, int wid, int hgt)
 
 ~CharSamp ()
 
unsigned short Left () const
 
unsigned short Right () const
 
unsigned short Top () const
 
unsigned short Bottom () const
 
unsigned short Page () const
 
unsigned short NormTop () const
 
unsigned short NormBottom () const
 
unsigned short NormAspectRatio () const
 
unsigned short FirstChar () const
 
unsigned short LastChar () const
 
char_32 Label () const
 
char_32StrLabel () const
 
string stringLabel () const
 
void SetLeft (unsigned short left)
 
void SetTop (unsigned short top)
 
void SetPage (unsigned short page)
 
void SetLabel (char_32 label)
 
void SetLabel (const char_32 *label32)
 
void SetLabel (string str)
 
void SetNormTop (unsigned short norm_top)
 
void SetNormBottom (unsigned short norm_bottom)
 
void SetNormAspectRatio (unsigned short norm_aspect_ratio)
 
void SetFirstChar (unsigned short first_char)
 
void SetLastChar (unsigned short last_char)
 
bool Save2CharDumpFile (FILE *fp) const
 
CharSampCrop ()
 
ConComp ** Segment (int *seg_cnt, bool right_2_left, int max_hist_wnd, int min_con_comp_size) const
 
CharSampScale (int wid, int hgt, bool isotropic=true)
 
CharSampClone () const
 
bool ComputeFeatures (int conv_grid_size, float *features)
 
int LabelLen () const
 
- Public Member Functions inherited from tesseract::Bmp8
 Bmp8 (unsigned short wid, unsigned short hgt)
 
 ~Bmp8 ()
 
bool Clear ()
 
unsigned short Width () const
 
unsigned short Stride () const
 
unsigned short Height () const
 
unsigned char * RawData () const
 
bool ScaleFrom (Bmp8 *bmp, bool isotropic=true)
 
bool Deslant ()
 
bool HorizontalDeslant (double *deslant_angle)
 
bool IsIdentical (Bmp8 *pBmp) const
 
ConComp ** FindConComps (int *concomp_cnt, int min_size) const
 
float ForegroundRatio () const
 
float MeanHorizontalHistogramEntropy () const
 
int * HorizontalHistogram () const
 

Static Public Member Functions

static CharSampFromCharDumpFile (CachedFile *fp)
 
static CharSampFromCharDumpFile (FILE *fp)
 
static CharSampFromCharDumpFile (unsigned char **raw_data)
 
static CharSampFromRawData (int left, int top, int wid, int hgt, unsigned char *data)
 
static CharSampFromConComps (ConComp **concomp_array, int strt_concomp, int seg_flags_size, int *seg_flags, bool *left_most, bool *right_most, int word_hgt)
 
static int AuxFeatureCnt ()
 
static int LabelLen (const char_32 *label32)
 
- Static Public Member Functions inherited from tesseract::Bmp8
static Bmp8FromCharDumpFile (CachedFile *fp)
 
static Bmp8FromCharDumpFile (FILE *fp)
 

Additional Inherited Members

- Protected Member Functions inherited from tesseract::Bmp8
bool LoadFromCharDumpFile (CachedFile *fp)
 
bool LoadFromCharDumpFile (FILE *fp)
 
bool LoadFromCharDumpFile (unsigned char **raw_data)
 
bool LoadFromRawData (unsigned char *data)
 
bool SaveBmp2CharDumpFile (FILE *fp) const
 
bool IsBlankColumn (int x) const
 
bool IsBlankRow (int y) const
 
void Crop (int *xst_src, int *yst_src, int *wid, int *hgt)
 
void Copy (int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
 
- Protected Attributes inherited from tesseract::Bmp8
unsigned short wid_
 
unsigned short hgt_
 
unsigned char ** line_buff_
 
- Static Protected Attributes inherited from tesseract::Bmp8
static const int kConCompAllocChunk = 16
 
static const int kDeslantAngleCount
 

Detailed Description

Definition at line 39 of file char_samp.h.

Constructor & Destructor Documentation

◆ CharSamp() [1/3]

tesseract::CharSamp::CharSamp ( )

Definition at line 29 of file char_samp.cpp.

30  : Bmp8(0, 0) {
31  left_ = 0;
32  top_ = 0;
33  label32_ = NULL;
34  page_ = -1;
35 }
Bmp8(unsigned short wid, unsigned short hgt)
Definition: bmp_8.cpp:38

◆ CharSamp() [2/3]

tesseract::CharSamp::CharSamp ( int  wid,
int  hgt 
)

Definition at line 37 of file char_samp.cpp.

38  : Bmp8(wid, hgt) {
39  left_ = 0;
40  top_ = 0;
41  label32_ = NULL;
42  page_ = -1;
43 }
Bmp8(unsigned short wid, unsigned short hgt)
Definition: bmp_8.cpp:38

◆ CharSamp() [3/3]

tesseract::CharSamp::CharSamp ( int  left,
int  top,
int  wid,
int  hgt 
)

Definition at line 45 of file char_samp.cpp.

46  : Bmp8(wid, hgt)
47  , left_(left)
48  , top_(top) {
49  label32_ = NULL;
50  page_ = -1;
51 }
Bmp8(unsigned short wid, unsigned short hgt)
Definition: bmp_8.cpp:38

◆ ~CharSamp()

tesseract::CharSamp::~CharSamp ( )

Definition at line 53 of file char_samp.cpp.

53  {
54  if (label32_ != NULL) {
55  delete []label32_;
56  label32_ = NULL;
57  }
58 }

Member Function Documentation

◆ AuxFeatureCnt()

static int tesseract::CharSamp::AuxFeatureCnt ( )
inlinestatic

Definition at line 130 of file char_samp.h.

130 { return (5); }

◆ Bottom()

unsigned short tesseract::CharSamp::Bottom ( ) const
inline

Definition at line 49 of file char_samp.h.

49 { return top_ + hgt_; }
unsigned short hgt_
Definition: bmp_8.h:96

◆ Clone()

CharSamp * tesseract::CharSamp::Clone ( ) const

Definition at line 542 of file char_samp.cpp.

542  {
543  // create the cropped char samp
544  CharSamp *samp = new CharSamp(left_, top_, wid_, hgt_);
545  samp->SetLabel(label32_);
546  samp->SetFirstChar(first_char_);
547  samp->SetLastChar(last_char_);
548  samp->SetNormTop(norm_top_);
549  samp->SetNormBottom(norm_bottom_);
550  samp->SetNormAspectRatio(norm_aspect_ratio_);
551  // copy the bitmap to the cropped img
552  Copy(0, 0, wid_, hgt_, samp);
553  return samp;
554 }
void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
Definition: bmp_8.cpp:545
unsigned short wid_
Definition: bmp_8.h:95
unsigned short hgt_
Definition: bmp_8.h:96

◆ ComputeFeatures()

bool tesseract::CharSamp::ComputeFeatures ( int  conv_grid_size,
float *  features 
)

Definition at line 617 of file char_samp.cpp.

617  {
618  // Create a scaled BMP
619  CharSamp *scaled_bmp = Scale(conv_grid_size, conv_grid_size);
620  if (!scaled_bmp) {
621  return false;
622  }
623  // prepare input
624  unsigned char *buff = scaled_bmp->RawData();
625  // bitmap features
626  int input;
627  int bmp_size = conv_grid_size * conv_grid_size;
628  for (input = 0; input < bmp_size; input++) {
629  features[input] = 255.0f - (1.0f * buff[input]);
630  }
631  // word context features
632  features[input++] = FirstChar();
633  features[input++] = LastChar();
634  features[input++] = NormTop();
635  features[input++] = NormBottom();
636  features[input++] = NormAspectRatio();
637  delete scaled_bmp;
638  return true;
639 }
unsigned short LastChar() const
Definition: char_samp.h:55
CharSamp * Scale(int wid, int hgt, bool isotropic=true)
Definition: char_samp.cpp:247
unsigned short NormAspectRatio() const
Definition: char_samp.h:53
unsigned short NormTop() const
Definition: char_samp.h:51
unsigned short FirstChar() const
Definition: char_samp.h:54
unsigned short NormBottom() const
Definition: char_samp.h:52

◆ Crop()

CharSamp * tesseract::CharSamp::Crop ( )

Definition at line 338 of file char_samp.cpp.

338  {
339  // get the dimesions of the cropped img
340  int cropped_left = 0;
341  int cropped_top = 0;
342  int cropped_wid = wid_;
343  int cropped_hgt = hgt_;
344  Bmp8::Crop(&cropped_left, &cropped_top,
345  &cropped_wid, &cropped_hgt);
346 
347  if (cropped_wid == 0 || cropped_hgt == 0) {
348  return NULL;
349  }
350  // create the cropped char samp
351  CharSamp *cropped_samp = new CharSamp(left_ + cropped_left,
352  top_ + cropped_top,
353  cropped_wid, cropped_hgt);
354  cropped_samp->SetLabel(label32_);
355  cropped_samp->SetFirstChar(first_char_);
356  cropped_samp->SetLastChar(last_char_);
357  // the following 3 fields may/should be reset by the calling function
358  // using context information, i.e., location of character box
359  // w.r.t. the word bounding box
360  cropped_samp->SetNormAspectRatio(255 *
361  cropped_wid / (cropped_wid + cropped_hgt));
362  cropped_samp->SetNormTop(0);
363  cropped_samp->SetNormBottom(255);
364 
365  // copy the bitmap to the cropped img
366  Copy(cropped_left, cropped_top, cropped_wid, cropped_hgt, cropped_samp);
367  return cropped_samp;
368 }
void Copy(int x, int y, int wid, int hgt, Bmp8 *bmp_dest) const
Definition: bmp_8.cpp:545
void Crop(int *xst_src, int *yst_src, int *wid, int *hgt)
Definition: bmp_8.cpp:318
unsigned short wid_
Definition: bmp_8.h:95
unsigned short hgt_
Definition: bmp_8.h:96

◆ FirstChar()

unsigned short tesseract::CharSamp::FirstChar ( ) const
inline

Definition at line 54 of file char_samp.h.

54 { return first_char_; }

◆ FromCharDumpFile() [1/3]

CharSamp * tesseract::CharSamp::FromCharDumpFile ( CachedFile fp)
static

Definition at line 82 of file char_samp.cpp.

82  {
83  unsigned short left;
84  unsigned short top;
85  unsigned short page;
86  unsigned short first_char;
87  unsigned short last_char;
88  unsigned short norm_top;
89  unsigned short norm_bottom;
90  unsigned short norm_aspect_ratio;
91  unsigned int val32;
92 
93  char_32 *label32;
94 
95  // read and check 32 bit marker
96  if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
97  return NULL;
98  }
99  if (val32 != 0xabd0fefe) {
100  return NULL;
101  }
102  // read label length,
103  if (fp->Read(&val32, sizeof(val32)) != sizeof(val32)) {
104  return NULL;
105  }
106  // the label is not null terminated in the file
107  if (val32 > 0 && val32 < MAX_UINT32) {
108  label32 = new char_32[val32 + 1];
109  // read label
110  if (fp->Read(label32, val32 * sizeof(*label32)) !=
111  (val32 * sizeof(*label32))) {
112  delete [] label32;
113  return NULL;
114  }
115  // null terminate
116  label32[val32] = 0;
117  } else {
118  label32 = NULL;
119  }
120  // read coordinates
121  if (fp->Read(&page, sizeof(page)) != sizeof(page)) {
122  delete [] label32;
123  return NULL;
124  }
125  if (fp->Read(&left, sizeof(left)) != sizeof(left)) {
126  delete [] label32;
127  return NULL;
128  }
129  if (fp->Read(&top, sizeof(top)) != sizeof(top)) {
130  delete [] label32;
131  return NULL;
132  }
133  if (fp->Read(&first_char, sizeof(first_char)) != sizeof(first_char)) {
134  delete [] label32;
135  return NULL;
136  }
137  if (fp->Read(&last_char, sizeof(last_char)) != sizeof(last_char)) {
138  delete [] label32;
139  return NULL;
140  }
141  if (fp->Read(&norm_top, sizeof(norm_top)) != sizeof(norm_top)) {
142  delete [] label32;
143  return NULL;
144  }
145  if (fp->Read(&norm_bottom, sizeof(norm_bottom)) != sizeof(norm_bottom)) {
146  delete [] label32;
147  return NULL;
148  }
149  if (fp->Read(&norm_aspect_ratio, sizeof(norm_aspect_ratio)) !=
150  sizeof(norm_aspect_ratio)) {
151  delete [] label32;
152  return NULL;
153  }
154  // create the object
155  CharSamp *char_samp = new CharSamp();
156  // init
157  char_samp->label32_ = label32;
158  char_samp->page_ = page;
159  char_samp->left_ = left;
160  char_samp->top_ = top;
161  char_samp->first_char_ = first_char;
162  char_samp->last_char_ = last_char;
163  char_samp->norm_top_ = norm_top;
164  char_samp->norm_bottom_ = norm_bottom;
165  char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
166  // load the Bmp8 part
167  if (char_samp->LoadFromCharDumpFile(fp) == false) {
168  delete char_samp;
169  return NULL;
170  }
171  return char_samp;
172 }
signed int char_32
Definition: string_32.h:40
#define MAX_UINT32
Definition: host.h:56

◆ FromCharDumpFile() [2/3]

CharSamp * tesseract::CharSamp::FromCharDumpFile ( FILE *  fp)
static

Definition at line 175 of file char_samp.cpp.

175  {
176  unsigned short left;
177  unsigned short top;
178  unsigned short page;
179  unsigned short first_char;
180  unsigned short last_char;
181  unsigned short norm_top;
182  unsigned short norm_bottom;
183  unsigned short norm_aspect_ratio;
184  unsigned int val32;
185  char_32 *label32;
186 
187  // read and check 32 bit marker
188  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
189  return NULL;
190  }
191  if (val32 != 0xabd0fefe) {
192  return NULL;
193  }
194  // read label length,
195  if (fread(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
196  return NULL;
197  }
198  // the label is not null terminated in the file
199  if (val32 > 0 && val32 < MAX_UINT32) {
200  label32 = new char_32[val32 + 1];
201  // read label
202  if (fread(label32, 1, val32 * sizeof(*label32), fp) !=
203  (val32 * sizeof(*label32))) {
204  delete [] label32;
205  return NULL;
206  }
207  // null terminate
208  label32[val32] = 0;
209  } else {
210  label32 = NULL;
211  }
212  // read coordinates
213  if (fread(&page, 1, sizeof(page), fp) != sizeof(page) ||
214  fread(&left, 1, sizeof(left), fp) != sizeof(left) ||
215  fread(&top, 1, sizeof(top), fp) != sizeof(top) ||
216  fread(&first_char, 1, sizeof(first_char), fp) != sizeof(first_char) ||
217  fread(&last_char, 1, sizeof(last_char), fp) != sizeof(last_char) ||
218  fread(&norm_top, 1, sizeof(norm_top), fp) != sizeof(norm_top) ||
219  fread(&norm_bottom, 1, sizeof(norm_bottom), fp) != sizeof(norm_bottom) ||
220  fread(&norm_aspect_ratio, 1, sizeof(norm_aspect_ratio), fp) !=
221  sizeof(norm_aspect_ratio)) {
222  delete [] label32;
223  return NULL;
224  }
225  // create the object
226  CharSamp *char_samp = new CharSamp();
227  // init
228  char_samp->label32_ = label32;
229  char_samp->page_ = page;
230  char_samp->left_ = left;
231  char_samp->top_ = top;
232  char_samp->first_char_ = first_char;
233  char_samp->last_char_ = last_char;
234  char_samp->norm_top_ = norm_top;
235  char_samp->norm_bottom_ = norm_bottom;
236  char_samp->norm_aspect_ratio_ = norm_aspect_ratio;
237  // load the Bmp8 part
238  if (char_samp->LoadFromCharDumpFile(fp) == false) {
239  delete char_samp; // It owns label32.
240  return NULL;
241  }
242  return char_samp;
243 }
signed int char_32
Definition: string_32.h:40
#define MAX_UINT32
Definition: host.h:56

◆ FromCharDumpFile() [3/3]

CharSamp * tesseract::CharSamp::FromCharDumpFile ( unsigned char **  raw_data)
static

Definition at line 557 of file char_samp.cpp.

557  {
558  unsigned int val32;
559  char_32 *label32;
560  unsigned char *raw_data = *raw_data_ptr;
561 
562  // read and check 32 bit marker
563  memcpy(&val32, raw_data, sizeof(val32));
564  raw_data += sizeof(val32);
565  if (val32 != 0xabd0fefe) {
566  return NULL;
567  }
568  // read label length,
569  memcpy(&val32, raw_data, sizeof(val32));
570  raw_data += sizeof(val32);
571  // the label is not null terminated in the file
572  if (val32 > 0 && val32 < MAX_UINT32) {
573  label32 = new char_32[val32 + 1];
574  // read label
575  memcpy(label32, raw_data, val32 * sizeof(*label32));
576  raw_data += (val32 * sizeof(*label32));
577  // null terminate
578  label32[val32] = 0;
579  } else {
580  label32 = NULL;
581  }
582 
583  // create the object
584  CharSamp *char_samp = new CharSamp();
585 
586  // read coordinates
587  char_samp->label32_ = label32;
588  memcpy(&char_samp->page_, raw_data, sizeof(char_samp->page_));
589  raw_data += sizeof(char_samp->page_);
590  memcpy(&char_samp->left_, raw_data, sizeof(char_samp->left_));
591  raw_data += sizeof(char_samp->left_);
592  memcpy(&char_samp->top_, raw_data, sizeof(char_samp->top_));
593  raw_data += sizeof(char_samp->top_);
594  memcpy(&char_samp->first_char_, raw_data, sizeof(char_samp->first_char_));
595  raw_data += sizeof(char_samp->first_char_);
596  memcpy(&char_samp->last_char_, raw_data, sizeof(char_samp->last_char_));
597  raw_data += sizeof(char_samp->last_char_);
598  memcpy(&char_samp->norm_top_, raw_data, sizeof(char_samp->norm_top_));
599  raw_data += sizeof(char_samp->norm_top_);
600  memcpy(&char_samp->norm_bottom_, raw_data, sizeof(char_samp->norm_bottom_));
601  raw_data += sizeof(char_samp->norm_bottom_);
602  memcpy(&char_samp->norm_aspect_ratio_, raw_data,
603  sizeof(char_samp->norm_aspect_ratio_));
604  raw_data += sizeof(char_samp->norm_aspect_ratio_);
605 
606  // load the Bmp8 part
607  if (char_samp->LoadFromCharDumpFile(&raw_data) == false) {
608  delete char_samp;
609  return NULL;
610  }
611 
612  (*raw_data_ptr) = raw_data;
613  return char_samp;
614 }
signed int char_32
Definition: string_32.h:40
#define MAX_UINT32
Definition: host.h:56

◆ FromConComps()

CharSamp * tesseract::CharSamp::FromConComps ( ConComp **  concomp_array,
int  strt_concomp,
int  seg_flags_size,
int *  seg_flags,
bool *  left_most,
bool *  right_most,
int  word_hgt 
)
static

Definition at line 439 of file char_samp.cpp.

442  {
443  int concomp;
444  int end_concomp;
445  int concomp_cnt = 0;
446  end_concomp = strt_concomp + seg_flags_size;
447  // determine ID range
448  bool once = false;
449  int min_id = -1;
450  int max_id = -1;
451  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
452  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
453  if (!once) {
454  min_id = concomp_array[concomp]->ID();
455  max_id = concomp_array[concomp]->ID();
456  once = true;
457  } else {
458  UpdateRange(concomp_array[concomp]->ID(), &min_id, &max_id);
459  }
460  concomp_cnt++;
461  }
462  }
463  if (concomp_cnt < 1 || !once || min_id == -1 || max_id == -1) {
464  return NULL;
465  }
466  // alloc memo for computing leftmost and right most attributes
467  int id_cnt = max_id - min_id + 1;
468  bool *id_exist = new bool[id_cnt];
469  bool *left_most_exist = new bool[id_cnt];
470  bool *right_most_exist = new bool[id_cnt];
471  memset(id_exist, 0, id_cnt * sizeof(*id_exist));
472  memset(left_most_exist, 0, id_cnt * sizeof(*left_most_exist));
473  memset(right_most_exist, 0, id_cnt * sizeof(*right_most_exist));
474  // find the dimensions of the charsamp
475  once = false;
476  int left = -1;
477  int right = -1;
478  int top = -1;
479  int bottom = -1;
480  int unq_ids = 0;
481  int unq_left_most = 0;
482  int unq_right_most = 0;
483  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
484  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
485  if (!once) {
486  left = concomp_array[concomp]->Left();
487  right = concomp_array[concomp]->Right();
488  top = concomp_array[concomp]->Top();
489  bottom = concomp_array[concomp]->Bottom();
490  once = true;
491  } else {
492  UpdateRange(concomp_array[concomp]->Left(),
493  concomp_array[concomp]->Right(), &left, &right);
494  UpdateRange(concomp_array[concomp]->Top(),
495  concomp_array[concomp]->Bottom(), &top, &bottom);
496  }
497  // count unq ids, unq left most and right mosts ids
498  int concomp_id = concomp_array[concomp]->ID() - min_id;
499  if (!id_exist[concomp_id]) {
500  id_exist[concomp_id] = true;
501  unq_ids++;
502  }
503  if (concomp_array[concomp]->LeftMost()) {
504  if (left_most_exist[concomp_id] == false) {
505  left_most_exist[concomp_id] = true;
506  unq_left_most++;
507  }
508  }
509  if (concomp_array[concomp]->RightMost()) {
510  if (right_most_exist[concomp_id] == false) {
511  right_most_exist[concomp_id] = true;
512  unq_right_most++;
513  }
514  }
515  }
516  }
517  delete []id_exist;
518  delete []left_most_exist;
519  delete []right_most_exist;
520  if (!once || left == -1 || top == -1 || right == -1 || bottom == -1) {
521  return NULL;
522  }
523  (*left_most) = (unq_left_most >= unq_ids);
524  (*right_most) = (unq_right_most >= unq_ids);
525  // create the char sample object
526  CharSamp *samp = new CharSamp(left, top, right - left + 1, bottom - top + 1);
527 
528  // set the foreground pixels
529  for (concomp = strt_concomp; concomp < end_concomp; concomp++) {
530  if (!seg_flags || seg_flags[concomp - strt_concomp] != 0) {
531  ConCompPt *pt_ptr = concomp_array[concomp]->Head();
532  while (pt_ptr) {
533  samp->line_buff_[pt_ptr->y() - top][pt_ptr->x() - left] = 0;
534  pt_ptr = pt_ptr->Next();
535  }
536  }
537  }
538  return samp;
539 }
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:125
unsigned short Right() const
Definition: char_samp.h:47
unsigned short Top() const
Definition: char_samp.h:48
unsigned short Left() const
Definition: char_samp.h:46
unsigned short Bottom() const
Definition: char_samp.h:49

◆ FromRawData()

CharSamp * tesseract::CharSamp::FromRawData ( int  left,
int  top,
int  wid,
int  hgt,
unsigned char *  data 
)
static

Definition at line 266 of file char_samp.cpp.

267  {
268  // create the object
269  CharSamp *char_samp = new CharSamp(left, top, wid, hgt);
270  if (char_samp->LoadFromRawData(data) == false) {
271  delete char_samp;
272  return NULL;
273  }
274  return char_samp;
275 }

◆ Label()

char_32 tesseract::CharSamp::Label ( ) const
inline

Definition at line 56 of file char_samp.h.

56  {
57  if (label32_ == NULL || LabelLen() != 1) {
58  return 0;
59  }
60  return label32_[0];
61  }
int LabelLen() const
Definition: char_samp.h:132

◆ LabelLen() [1/2]

int tesseract::CharSamp::LabelLen ( ) const
inline

Definition at line 132 of file char_samp.h.

132 { return LabelLen(label32_); }
int LabelLen() const
Definition: char_samp.h:132

◆ LabelLen() [2/2]

static int tesseract::CharSamp::LabelLen ( const char_32 label32)
inlinestatic

Definition at line 133 of file char_samp.h.

133  {
134  if (label32 == NULL) {
135  return 0;
136  }
137  int len = 0;
138  while (label32[++len] != 0);
139  return len;
140  }

◆ LastChar()

unsigned short tesseract::CharSamp::LastChar ( ) const
inline

Definition at line 55 of file char_samp.h.

55 { return last_char_; }

◆ Left()

unsigned short tesseract::CharSamp::Left ( ) const
inline

Definition at line 46 of file char_samp.h.

46 { return left_; }

◆ NormAspectRatio()

unsigned short tesseract::CharSamp::NormAspectRatio ( ) const
inline

Definition at line 53 of file char_samp.h.

53 { return norm_aspect_ratio_; }

◆ NormBottom()

unsigned short tesseract::CharSamp::NormBottom ( ) const
inline

Definition at line 52 of file char_samp.h.

52 { return norm_bottom_; }

◆ NormTop()

unsigned short tesseract::CharSamp::NormTop ( ) const
inline

Definition at line 51 of file char_samp.h.

51 { return norm_top_; }

◆ Page()

unsigned short tesseract::CharSamp::Page ( ) const
inline

Definition at line 50 of file char_samp.h.

50 { return page_; }

◆ Right()

unsigned short tesseract::CharSamp::Right ( ) const
inline

Definition at line 47 of file char_samp.h.

47 { return left_ + wid_; }
unsigned short wid_
Definition: bmp_8.h:95

◆ Save2CharDumpFile()

bool tesseract::CharSamp::Save2CharDumpFile ( FILE *  fp) const

Definition at line 278 of file char_samp.cpp.

278  {
279  unsigned int val32;
280  // write and check 32 bit marker
281  val32 = 0xabd0fefe;
282  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
283  return false;
284  }
285  // write label length
286  val32 = (label32_ == NULL) ? 0 : LabelLen(label32_);
287  if (fwrite(&val32, 1, sizeof(val32), fp) != sizeof(val32)) {
288  return false;
289  }
290  // write label
291  if (label32_ != NULL) {
292  if (fwrite(label32_, 1, val32 * sizeof(*label32_), fp) !=
293  (val32 * sizeof(*label32_))) {
294  return false;
295  }
296  }
297  // write coordinates
298  if (fwrite(&page_, 1, sizeof(page_), fp) != sizeof(page_)) {
299  return false;
300  }
301  if (fwrite(&left_, 1, sizeof(left_), fp) != sizeof(left_)) {
302  return false;
303  }
304  if (fwrite(&top_, 1, sizeof(top_), fp) != sizeof(top_)) {
305  return false;
306  }
307  if (fwrite(&first_char_, 1, sizeof(first_char_), fp) !=
308  sizeof(first_char_)) {
309  return false;
310  }
311  if (fwrite(&last_char_, 1, sizeof(last_char_), fp) != sizeof(last_char_)) {
312  return false;
313  }
314  if (fwrite(&norm_top_, 1, sizeof(norm_top_), fp) != sizeof(norm_top_)) {
315  return false;
316  }
317  if (fwrite(&norm_bottom_, 1, sizeof(norm_bottom_), fp) !=
318  sizeof(norm_bottom_)) {
319  return false;
320  }
321  if (fwrite(&norm_aspect_ratio_, 1, sizeof(norm_aspect_ratio_), fp) !=
322  sizeof(norm_aspect_ratio_)) {
323  return false;
324  }
325  if (SaveBmp2CharDumpFile(fp) == false) {
326  return false;
327  }
328  return true;
329 }
int LabelLen() const
Definition: char_samp.h:132
bool SaveBmp2CharDumpFile(FILE *fp) const
Definition: bmp_8.cpp:485

◆ Scale()

CharSamp * tesseract::CharSamp::Scale ( int  wid,
int  hgt,
bool  isotropic = true 
)

Definition at line 247 of file char_samp.cpp.

247  {
248  CharSamp *scaled_samp = new CharSamp(wid, hgt);
249  if (scaled_samp->ScaleFrom(this, isotropic) == false) {
250  delete scaled_samp;
251  return NULL;
252  }
253  scaled_samp->left_ = left_;
254  scaled_samp->top_ = top_;
255  scaled_samp->page_ = page_;
256  scaled_samp->SetLabel(label32_);
257  scaled_samp->first_char_ = first_char_;
258  scaled_samp->last_char_ = last_char_;
259  scaled_samp->norm_top_ = norm_top_;
260  scaled_samp->norm_bottom_ = norm_bottom_;
261  scaled_samp->norm_aspect_ratio_ = norm_aspect_ratio_;
262  return scaled_samp;
263 }

◆ Segment()

ConComp ** tesseract::CharSamp::Segment ( int *  seg_cnt,
bool  right_2_left,
int  max_hist_wnd,
int  min_con_comp_size 
) const

Definition at line 372 of file char_samp.cpp.

373  {
374  // init
375  (*segment_cnt) = 0;
376  int concomp_cnt = 0;
377  int seg_cnt = 0;
378  // find the concomps of the image
379  ConComp **concomp_array = FindConComps(&concomp_cnt, min_con_comp_size);
380  if (concomp_cnt <= 0 || !concomp_array) {
381  if (concomp_array)
382  delete []concomp_array;
383  return NULL;
384  }
385  ConComp **seg_array = NULL;
386  // segment each concomp further using vertical histogram
387  for (int concomp = 0; concomp < concomp_cnt; concomp++) {
388  int concomp_seg_cnt = 0;
389  // segment the concomp
390  ConComp **concomp_seg_array = NULL;
391  ConComp **concomp_alloc_seg =
392  concomp_array[concomp]->Segment(max_hist_wnd, &concomp_seg_cnt);
393  // no segments, add the whole concomp
394  if (concomp_alloc_seg == NULL) {
395  concomp_seg_cnt = 1;
396  concomp_seg_array = concomp_array + concomp;
397  } else {
398  // delete the original concomp, we no longer need it
399  concomp_seg_array = concomp_alloc_seg;
400  delete concomp_array[concomp];
401  }
402  // add the resulting segments
403  for (int seg_idx = 0; seg_idx < concomp_seg_cnt; seg_idx++) {
404  // too small of a segment: ignore
405  if (concomp_seg_array[seg_idx]->Width() < 2 &&
406  concomp_seg_array[seg_idx]->Height() < 2) {
407  delete concomp_seg_array[seg_idx];
408  } else {
409  // add the new segment
410  // extend the segment array
411  if ((seg_cnt % kConCompAllocChunk) == 0) {
412  ConComp **temp_segm_array =
413  new ConComp *[seg_cnt + kConCompAllocChunk];
414  if (seg_cnt > 0) {
415  memcpy(temp_segm_array, seg_array, seg_cnt * sizeof(*seg_array));
416  delete []seg_array;
417  }
418  seg_array = temp_segm_array;
419  }
420  seg_array[seg_cnt++] = concomp_seg_array[seg_idx];
421  }
422  } // segment
423  if (concomp_alloc_seg != NULL) {
424  delete []concomp_alloc_seg;
425  }
426  } // concomp
427  delete []concomp_array;
428 
429  // sort the concomps from Left2Right or Right2Left, based on the reading order
430  if (seg_cnt > 0 && seg_array != NULL) {
431  qsort(seg_array, seg_cnt, sizeof(*seg_array), right_2_left ?
433  }
434  (*segment_cnt) = seg_cnt;
435  return seg_array;
436 }
static int Right2LeftComparer(const void *comp1, const void *comp2)
Definition: con_comp.h:82
unsigned short Height() const
Definition: bmp_8.h:50
unsigned short Width() const
Definition: bmp_8.h:48
static const int kConCompAllocChunk
Definition: bmp_8.h:100
static int Left2RightComparer(const void *comp1, const void *comp2)
Definition: con_comp.h:73
ConComp ** FindConComps(int *concomp_cnt, int min_size) const
Definition: bmp_8.cpp:572

◆ SetFirstChar()

void tesseract::CharSamp::SetFirstChar ( unsigned short  first_char)
inline

Definition at line 96 of file char_samp.h.

96  {
97  first_char_ = first_char;
98  }

◆ SetLabel() [1/3]

void tesseract::CharSamp::SetLabel ( char_32  label)
inline

Definition at line 68 of file char_samp.h.

68  {
69  delete []label32_;
70  label32_ = new char_32[2];
71  label32_[0] = label;
72  label32_[1] = 0;
73  }
signed int char_32
Definition: string_32.h:40

◆ SetLabel() [2/3]

void tesseract::CharSamp::SetLabel ( const char_32 label32)
inline

Definition at line 74 of file char_samp.h.

74  {
75  delete []label32_;
76  label32_ = NULL;
77  if (label32 != NULL) {
78  // remove any byte order marks if any
79  if (label32[0] == 0xfeff) {
80  label32++;
81  }
82  int len = LabelLen(label32);
83  label32_ = new char_32[len + 1];
84  memcpy(label32_, label32, len * sizeof(*label32));
85  label32_[len] = 0;
86  }
87  }
int LabelLen() const
Definition: char_samp.h:132
signed int char_32
Definition: string_32.h:40

◆ SetLabel() [3/3]

void tesseract::CharSamp::SetLabel ( string  str)

Definition at line 71 of file char_samp.cpp.

71  {
72  if (label32_ != NULL) {
73  delete []label32_;
74  label32_ = NULL;
75  }
76  string_32 str32;
77  CubeUtils::UTF8ToUTF32(str.c_str(), &str32);
78  SetLabel(reinterpret_cast<const char_32 *>(str32.c_str()));
79 }
void SetLabel(char_32 label)
Definition: char_samp.h:68
basic_string< char_32 > string_32
Definition: string_32.h:41
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
Definition: cube_utils.cpp:256

◆ SetLastChar()

void tesseract::CharSamp::SetLastChar ( unsigned short  last_char)
inline

Definition at line 99 of file char_samp.h.

99  {
100  last_char_ = last_char;
101  }

◆ SetLeft()

void tesseract::CharSamp::SetLeft ( unsigned short  left)
inline

Definition at line 65 of file char_samp.h.

65 { left_ = left; }

◆ SetNormAspectRatio()

void tesseract::CharSamp::SetNormAspectRatio ( unsigned short  norm_aspect_ratio)
inline

Definition at line 93 of file char_samp.h.

93  {
94  norm_aspect_ratio_ = norm_aspect_ratio;
95  }

◆ SetNormBottom()

void tesseract::CharSamp::SetNormBottom ( unsigned short  norm_bottom)
inline

Definition at line 90 of file char_samp.h.

90  {
91  norm_bottom_ = norm_bottom;
92  }

◆ SetNormTop()

void tesseract::CharSamp::SetNormTop ( unsigned short  norm_top)
inline

Definition at line 89 of file char_samp.h.

89 { norm_top_ = norm_top; }

◆ SetPage()

void tesseract::CharSamp::SetPage ( unsigned short  page)
inline

Definition at line 67 of file char_samp.h.

67 { page_ = page; }

◆ SetTop()

void tesseract::CharSamp::SetTop ( unsigned short  top)
inline

Definition at line 66 of file char_samp.h.

66 { top_ = top; }

◆ stringLabel()

string tesseract::CharSamp::stringLabel ( ) const

Definition at line 61 of file char_samp.cpp.

61  {
62  string str = "";
63  if (label32_ != NULL) {
64  string_32 str32(label32_);
65  CubeUtils::UTF32ToUTF8(str32.c_str(), &str);
66  }
67  return str;
68 }
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
Definition: cube_utils.cpp:272
basic_string< char_32 > string_32
Definition: string_32.h:41

◆ StrLabel()

char_32* tesseract::CharSamp::StrLabel ( ) const
inline

Definition at line 62 of file char_samp.h.

62 { return label32_; }

◆ Top()

unsigned short tesseract::CharSamp::Top ( ) const
inline

Definition at line 48 of file char_samp.h.

48 { return top_; }

The documentation for this class was generated from the following files: