tesseract  3.05.02
lang_model.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: lang_model.h
3  * Description: Declaration of the Language Model Edge Base Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // The LanguageModel class abstracts a State machine that is modeled as a Trie
21 // structure. The state machine models the language being recognized by the OCR
22 // Engine
23 // This is an abstract class that is to be inherited by any language model
24 
25 #ifndef LANG_MODEL_H
26 #define LANG_MODEL_H
27 
28 #include "lang_mod_edge.h"
29 #include "char_altlist.h"
30 #include "char_set.h"
31 #include "tuning_params.h"
32 
33 namespace tesseract {
34 class LangModel {
35  public:
37  ood_enabled_ = true;
38  numeric_enabled_ = true;
39  word_list_enabled_ = true;
40  punc_enabled_ = true;
41  }
42  virtual ~LangModel() {}
43 
44  // Returns an edge pointer to the Root
45  virtual LangModEdge *Root() = 0;
46  // Returns the edges that fan-out of the specified edge and their count
47  virtual LangModEdge **GetEdges(CharAltList *alt_list,
48  LangModEdge *parent_edge,
49  int *edge_cnt) = 0;
50  // Returns is a sequence of 32-bit characters are valid within this language
51  // model or net. And EndOfWord flag is specified. If true, the sequence has
52  // to end on a valid word. The function also optionally returns the list
53  // of language model edges traversed to parse the string
54  virtual bool IsValidSequence(const char_32 *str, bool eow_flag,
55  LangModEdge **edge_array = NULL) = 0;
56  virtual bool IsLeadingPunc(char_32 ch) = 0;
57  virtual bool IsTrailingPunc(char_32 ch) = 0;
58  virtual bool IsDigit(char_32 ch) = 0;
59 
60  // accessor functions
61  inline bool OOD() { return ood_enabled_; }
62  inline bool Numeric() { return numeric_enabled_; }
63  inline bool WordList() { return word_list_enabled_; }
64  inline bool Punc() { return punc_enabled_; }
65  inline void SetOOD(bool ood) { ood_enabled_ = ood; }
66  inline void SetNumeric(bool numeric) { numeric_enabled_ = numeric; }
67  inline void SetWordList(bool word_list) { word_list_enabled_ = word_list; }
68  inline void SetPunc(bool punc_enabled) { punc_enabled_ = punc_enabled; }
69 
70  protected:
75 };
76 }
77 
78 #endif // LANG_MODEL_H
virtual LangModEdge * Root()=0
virtual bool IsValidSequence(const char_32 *str, bool eow_flag, LangModEdge **edge_array=NULL)=0
void SetOOD(bool ood)
Definition: lang_model.h:65
virtual LangModEdge ** GetEdges(CharAltList *alt_list, LangModEdge *parent_edge, int *edge_cnt)=0
virtual bool IsLeadingPunc(char_32 ch)=0
virtual ~LangModel()
Definition: lang_model.h:42
void SetPunc(bool punc_enabled)
Definition: lang_model.h:68
virtual bool IsTrailingPunc(char_32 ch)=0
signed int char_32
Definition: string_32.h:40
virtual bool IsDigit(char_32 ch)=0
void SetNumeric(bool numeric)
Definition: lang_model.h:66
void SetWordList(bool word_list)
Definition: lang_model.h:67