40 void WordListLangModel::Cleanup() {
49 bool WordListLangModel::Init() {
72 if (Init() ==
false) {
83 if (tess_lm_edge == NULL) {
86 edge_ref = tess_lm_edge->
EndEdge();
100 edge_array + (*edge_cnt));
117 vector<WERD_CHOICE *> *word_variants) {
118 int str_len = str32.length();
120 if (word_so_far->
length() > 0) {
121 word_variants->push_back(
new WERD_CHOICE(*word_so_far));
125 for (
int len = 1; len <= str_len; len++) {
127 string_32 str_pref32 = str32.substr(0, len);
128 int class_id = char_set.
ClassID(reinterpret_cast<const char_32 *>(
129 str_pref32.c_str()));
133 string_32 new_prefix_str32 = prefix_str32 + str_pref32;
136 WordVariants(char_set, new_prefix_str32, word_so_far, new_str32,
149 vector<WERD_CHOICE *> *word_variants) {
150 for (
int i = 0; i < word_variants->size(); i++) {
151 delete (*word_variants)[i];
153 word_variants->clear();
156 WordVariants(char_set, prefix_str32, &word_so_far, str32, word_variants);
161 if (!init_ && !Init()) {
167 if (str32.length() < 1) {
175 if (char_32_ptr == NULL) {
179 vector<WERD_CHOICE *> word_variants;
181 char_32_ptr, &word_variants);
183 if (word_variants.size() > 0) {
185 int shortest_word = 0;
186 for (
int word = 1; word < word_variants.size(); word++) {
187 if (word_variants[shortest_word]->length() >
188 word_variants[word]->length()) {
189 shortest_word = word;
195 for (
int i = 0; i < word_variants.size(); i++) {
delete word_variants[i]; }
const UNICHARSET * TessUnicharset() const
bool AddString32(const char_32 *char_32_ptr)
NODE_REF next_node(EDGE_REF edge_ref) const
bool add_word_to_dawg(const WERD_CHOICE &word, const GenericVector< bool > *repetitions)
static int CreateChildren(CubeRecoContext *cntxt, const Dawg *edges, NODE_REF edge_reg, LangModEdge **lm_edges)
static void WordVariants(const CharSet &char_set, const UNICHARSET *uchset, string_32 str32, vector< WERD_CHOICE *> *word_variants)
void remove_last_unichar_id()
LangModEdge ** GetEdges(CharAltList *alt_list, LangModEdge *edge, int *edge_cnt)
WordListLangModel(CubeRecoContext *cntxt)
CharSet * CharacterSet() const
bool AddString(const char *char_ptr)
basic_string< char_32 > string_32
static void UTF8ToUTF32(const char *utf8_str, string_32 *str32)
int ClassID(const char_32 *str) const
bool IsValidSequence(const char_32 *sequence, bool eow_flag, LangModEdge **edges)
void append_unichar_id(UNICHAR_ID unichar_id, int blob_count, float rating, float certainty)