136 if (cube_alt_list == NULL || cube_alt_list->AltCount() <= 0)
140 char_32 *cube_best_str32 = cube_alt_list->Alt(0);
143 string cube_best_str;
144 int cube_best_cost = cube_alt_list->AltCost(0);
145 int cube_best_bigram_cost = 0;
146 bool cube_best_bigram_cost_valid =
true;
148 cube_best_bigram_cost = cube_cntxt_->
Bigrams()->
151 cube_best_bigram_cost_valid =
false;
159 *agreement = (tess_str.compare(cube_best_str) == 0);
163 string cube_next_best_str;
165 if (cube_alt_list->AltCount() > 1) {
166 cube_next_best_str32 = cube_alt_list->Alt(1);
167 if (cube_next_best_str32 == NULL ||
171 cube_next_best_cost = cube_alt_list->AltCost(1);
176 for (tess_rank = 0; tess_rank < cube_alt_list->AltCount(); tess_rank++) {
179 if (alt_str == tess_str)
185 int tess_cost = cube_obj->WordCost(tess_str.c_str());
187 int tess_bigram_cost = 0;
188 int tess_bigram_cost_valid =
true;
190 tess_bigram_cost = cube_cntxt_->
Bigrams()->
193 tess_bigram_cost_valid =
false;
196 features->push_back(tess_confidence);
198 features->push_back(tess_cost);
200 features->push_back(tess_rank);
202 features->push_back(tess_str.length());
204 features->push_back(
ValidWord(tess_str));
205 if (tess_bigram_cost_valid) {
207 features->push_back(tess_bigram_cost);
210 features->push_back(cube_best_cost);
212 features->push_back(cube_next_best_cost);
214 features->push_back(cube_best_str.length());
216 features->push_back(
ValidWord(cube_best_str));
217 if (cube_best_bigram_cost_valid) {
219 features->push_back(cube_best_bigram_cost);
222 int compare_nocase_punc = CompareStrings(cube_best_str,
223 tess_str,
false,
true);
224 features->push_back(compare_nocase_punc == 0);
226 int compare_case_nopunc = CompareStrings(cube_best_str,
227 tess_str,
true,
false);
228 features->push_back(compare_case_nopunc == 0);
230 int compare_nocase_nopunc = CompareStrings(cube_best_str,
231 tess_str,
true,
true);
232 features->push_back(compare_nocase_nopunc == 0);
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
static int StrLen(const char_32 *str)
CharSet * CharacterSet() const
basic_string< char_32 > string_32
bool ValidWord(const std::string &str)
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
CharBigrams * Bigrams() const