30 class_strings_ = NULL;
31 unicharset_map_ = NULL;
35 memset(hash_bin_size_, 0,
sizeof(hash_bin_size_));
39 if (class_strings_ != NULL) {
40 for (
int cls = 0; cls < class_cnt_; cls++) {
41 if (class_strings_[cls] != NULL) {
42 delete class_strings_[cls];
45 delete []class_strings_;
46 class_strings_ = NULL;
48 delete []unicharset_map_;
59 bool cube_unicharset_exists;
60 if (!(cube_unicharset_exists =
63 fprintf(stderr,
"Cube ERROR (CharSet::Create): could not find " 64 "either cube or tesseract unicharset\n");
69 fprintf(stderr,
"Cube ERROR (CharSet::Create): could not load " 78 if (cube_unicharset_exists) {
81 loaded = loaded && char_set->LoadSupportedCharList(
83 char_set->unicharset_ = &char_set->cube_unicharset_;
85 loaded = char_set->LoadSupportedCharList(charset_fp, NULL);
86 char_set->unicharset_ = tess_unicharset;
93 char_set->init_ =
true;
98 bool CharSet::LoadSupportedCharList(FILE *fp,
UNICHARSET *tess_unicharset) {
104 memset(hash_bin_size_, 0,
sizeof(hash_bin_size_));
106 if (fgets(str_line,
sizeof(str_line), fp) == NULL) {
107 fprintf(stderr,
"Cube ERROR (CharSet::InitMemory): could not " 108 "read char count.\n");
111 class_cnt_ = atoi(str_line);
112 if (class_cnt_ < 2) {
113 fprintf(stderr,
"Cube ERROR (CharSet::InitMemory): invalid " 114 "class count: %d\n", class_cnt_);
118 class_strings_ =
new string_32*[class_cnt_];
120 if (tess_unicharset) {
121 unicharset_map_ =
new int[class_cnt_];
125 for (
int class_id = 0; class_id < class_cnt_; class_id++) {
127 if (fgets(str_line,
sizeof(str_line), fp) == NULL) {
128 fprintf(stderr,
"Cube ERROR (CharSet::ReadAndHashStrings): " 129 "could not read class string with class_id=%d.\n", class_id);
133 char *p = strchr(str_line,
' ');
139 if (strcmp(str_line,
"NULL") == 0) {
140 strcpy(str_line,
" ");
143 class_strings_[class_id] =
new string_32(str32);
146 int hash_val = Hash(reinterpret_cast<const char_32 *>(str32.c_str()));
147 if (hash_bin_size_[hash_val] >= kMaxHashSize) {
148 fprintf(stderr,
"Cube ERROR (CharSet::LoadSupportedCharList): hash " 152 hash_bins_[hash_val][hash_bin_size_[hash_val]++] = class_id;
154 if (tess_unicharset != NULL) {
157 if (tess_id == INVALID_UNICHAR_ID) {
162 unicharset_map_[class_id] = tess_id;
FILE * GetDataFilePtr() const
static CharSet * Create(TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)
bool load_from_file(const char *const filename, bool skip_fragments)
void TESS_API unichar_insert(const char *const unichar_repr)
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
basic_string< char_32 > string_32
bool SeekToStart(TessdataType tessdata_type)
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)