tesseract  3.05.02
word_altlist.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: word_altlist.cpp
3  * Description: Implementation of the Word Alternate List Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "word_altlist.h"
21 
22 namespace tesseract {
24  : AltList(max_alt) {
25  word_alt_ = NULL;
26 }
27 
29  if (word_alt_ != NULL) {
30  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
31  if (word_alt_[alt_idx] != NULL) {
32  delete []word_alt_[alt_idx];
33  }
34  }
35  delete []word_alt_;
36  word_alt_ = NULL;
37  }
38 }
39 
43 bool WordAltList::Insert(char_32 *word_str, int cost, void *tag) {
44  if (word_alt_ == NULL || alt_cost_ == NULL) {
45  word_alt_ = new char_32*[max_alt_];
46  alt_cost_ = new int[max_alt_];
47  alt_tag_ = new void *[max_alt_];
48  memset(alt_tag_, 0, max_alt_ * sizeof(*alt_tag_));
49  } else {
50  // check if alt already exists
51  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
52  if (CubeUtils::StrCmp(word_str, word_alt_[alt_idx]) == 0) {
53  // update the cost if we have a lower one
54  if (cost < alt_cost_[alt_idx]) {
55  alt_cost_[alt_idx] = cost;
56  alt_tag_[alt_idx] = tag;
57  }
58  return true;
59  }
60  }
61  }
62 
63  // determine length of alternate
64  int len = CubeUtils::StrLen(word_str);
65 
66  word_alt_[alt_cnt_] = new char_32[len + 1];
67 
68  if (len > 0) {
69  memcpy(word_alt_[alt_cnt_], word_str, len * sizeof(*word_str));
70  }
71 
72  word_alt_[alt_cnt_][len] = 0;
73  alt_cost_[alt_cnt_] = cost;
74  alt_tag_[alt_cnt_] = tag;
75 
76  alt_cnt_++;
77 
78  return true;
79 }
80 
85  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
86  for (int alt = alt_idx + 1; alt < alt_cnt_; alt++) {
87  if (alt_cost_[alt_idx] > alt_cost_[alt]) {
88  char_32 *pchTemp = word_alt_[alt_idx];
89  word_alt_[alt_idx] = word_alt_[alt];
90  word_alt_[alt] = pchTemp;
91 
92  int temp = alt_cost_[alt_idx];
93  alt_cost_[alt_idx] = alt_cost_[alt];
94  alt_cost_[alt] = temp;
95 
96  void *tag = alt_tag_[alt_idx];
97  alt_tag_[alt_idx] = alt_tag_[alt];
98  alt_tag_[alt] = tag;
99  }
100  }
101  }
102 }
103 
105  for (int alt_idx = 0; alt_idx < alt_cnt_; alt_idx++) {
106  char_32 *word_32 = word_alt_[alt_idx];
107  string word_str;
108  CubeUtils::UTF32ToUTF8(word_32, &word_str);
109  int num_unichars = CubeUtils::StrLen(word_32);
110  fprintf(stderr, "Alt[%d]=%s (cost=%d, num_unichars=%d); unichars=", alt_idx,
111  word_str.c_str(), alt_cost_[alt_idx], num_unichars);
112  for (int i = 0; i < num_unichars; ++i)
113  fprintf(stderr, "%d ", word_32[i]);
114  fprintf(stderr, "\n");
115  }
116 }
117 } // namespace tesseract
WordAltList(int max_alt)
void ** alt_tag_
Definition: altlist.h:57
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
Definition: cube_utils.cpp:272
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:54
signed int char_32
Definition: string_32.h:40
static int StrCmp(const char_32 *str1, const char_32 *str2)
Definition: cube_utils.cpp:66
bool Insert(char_32 *char_ptr, int cost, void *tag=NULL)