tesseract  3.05.02
tesseract::CharSet Class Reference

#include <char_set.h>

Public Member Functions

 CharSet ()
 
 ~CharSet ()
 
bool SharedUnicharset ()
 
int ClassID (const char_32 *str) const
 
int ClassID (char_32 ch) const
 
int UnicharID (const char_32 *str) const
 
int UnicharID (char_32 ch) const
 
const char_32ClassString (int class_id) const
 
int ClassCount () const
 
UNICHARSETInternalUnicharset ()
 

Static Public Member Functions

static CharSetCreate (TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)
 

Detailed Description

Definition at line 42 of file char_set.h.

Constructor & Destructor Documentation

◆ CharSet()

tesseract::CharSet::CharSet ( )

Definition at line 28 of file char_set.cpp.

28  {
29  class_cnt_ = 0;
30  class_strings_ = NULL;
31  unicharset_map_ = NULL;
32  init_ = false;
33 
34  // init hash table
35  memset(hash_bin_size_, 0, sizeof(hash_bin_size_));
36 }

◆ ~CharSet()

tesseract::CharSet::~CharSet ( )

Definition at line 38 of file char_set.cpp.

38  {
39  if (class_strings_ != NULL) {
40  for (int cls = 0; cls < class_cnt_; cls++) {
41  if (class_strings_[cls] != NULL) {
42  delete class_strings_[cls];
43  }
44  }
45  delete []class_strings_;
46  class_strings_ = NULL;
47  }
48  delete []unicharset_map_;
49 }

Member Function Documentation

◆ ClassCount()

int tesseract::CharSet::ClassCount ( ) const
inline

Definition at line 111 of file char_set.h.

111 { return class_cnt_; }

◆ ClassID() [1/2]

int tesseract::CharSet::ClassID ( const char_32 str) const
inline

Definition at line 54 of file char_set.h.

54  {
55  int hash_val = Hash(str);
56  if (hash_bin_size_[hash_val] == 0)
57  return -1;
58  for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
59  if (class_strings_[hash_bins_[hash_val][bin]]->compare(str) == 0)
60  return hash_bins_[hash_val][bin];
61  }
62  return -1;
63  }

◆ ClassID() [2/2]

int tesseract::CharSet::ClassID ( char_32  ch) const
inline

Definition at line 65 of file char_set.h.

65  {
66  int hash_val = Hash(ch);
67  if (hash_bin_size_[hash_val] == 0)
68  return -1;
69  for (int bin = 0; bin < hash_bin_size_[hash_val]; bin++) {
70  if ((*class_strings_[hash_bins_[hash_val][bin]])[0] == ch &&
71  class_strings_[hash_bins_[hash_val][bin]]->length() == 1) {
72  return hash_bins_[hash_val][bin];
73  }
74  }
75  return -1;
76  }

◆ ClassString()

const char_32* tesseract::CharSet::ClassString ( int  class_id) const
inline

Definition at line 104 of file char_set.h.

104  {
105  if (class_id < 0 || class_id >= class_cnt_) {
106  return NULL;
107  }
108  return reinterpret_cast<const char_32 *>(class_strings_[class_id]->c_str());
109  }
signed int char_32
Definition: string_32.h:40

◆ Create()

CharSet * tesseract::CharSet::Create ( TessdataManager tessdata_manager,
UNICHARSET tess_unicharset 
)
static

Definition at line 54 of file char_set.cpp.

55  {
56  CharSet *char_set = new CharSet();
57 
58  // First look for Cube's unicharset; if not there, use tesseract's
59  bool cube_unicharset_exists;
60  if (!(cube_unicharset_exists =
61  tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET)) &&
62  !tessdata_manager->SeekToStart(TESSDATA_UNICHARSET)) {
63  fprintf(stderr, "Cube ERROR (CharSet::Create): could not find "
64  "either cube or tesseract unicharset\n");
65  return NULL;
66  }
67  FILE *charset_fp = tessdata_manager->GetDataFilePtr();
68  if (!charset_fp) {
69  fprintf(stderr, "Cube ERROR (CharSet::Create): could not load "
70  "a unicharset\n");
71  return NULL;
72  }
73 
74  // If we found a cube unicharset separate from tesseract's, load it and
75  // map its unichars to tesseract's; if only one unicharset exists,
76  // just load it.
77  bool loaded;
78  if (cube_unicharset_exists) {
79  char_set->cube_unicharset_.load_from_file(charset_fp);
80  loaded = tessdata_manager->SeekToStart(TESSDATA_CUBE_UNICHARSET);
81  loaded = loaded && char_set->LoadSupportedCharList(
82  tessdata_manager->GetDataFilePtr(), tess_unicharset);
83  char_set->unicharset_ = &char_set->cube_unicharset_;
84  } else {
85  loaded = char_set->LoadSupportedCharList(charset_fp, NULL);
86  char_set->unicharset_ = tess_unicharset;
87  }
88  if (!loaded) {
89  delete char_set;
90  return NULL;
91  }
92 
93  char_set->init_ = true;
94  return char_set;
95 }

◆ InternalUnicharset()

UNICHARSET* tesseract::CharSet::InternalUnicharset ( )
inline

Definition at line 121 of file char_set.h.

121 { return unicharset_; }

◆ SharedUnicharset()

bool tesseract::CharSet::SharedUnicharset ( )
inline

Definition at line 48 of file char_set.h.

48 { return (unicharset_map_ == NULL); }

◆ UnicharID() [1/2]

int tesseract::CharSet::UnicharID ( const char_32 str) const
inline

Definition at line 80 of file char_set.h.

80  {
81  int class_id = ClassID(str);
82  if (class_id == INVALID_UNICHAR_ID)
83  return INVALID_UNICHAR_ID;
84  int unichar_id;
85  if (unicharset_map_)
86  unichar_id = unicharset_map_[class_id];
87  else
88  unichar_id = class_id;
89  return unichar_id;
90  }
int ClassID(const char_32 *str) const
Definition: char_set.h:54

◆ UnicharID() [2/2]

int tesseract::CharSet::UnicharID ( char_32  ch) const
inline

Definition at line 92 of file char_set.h.

92  {
93  int class_id = ClassID(ch);
94  if (class_id == INVALID_UNICHAR_ID)
95  return INVALID_UNICHAR_ID;
96  int unichar_id;
97  if (unicharset_map_)
98  unichar_id = unicharset_map_[class_id];
99  else
100  unichar_id = class_id;
101  return unichar_id;
102  }
int ClassID(const char_32 *str) const
Definition: char_set.h:54

The documentation for this class was generated from the following files: