tesseract  3.05.02
cube_reco_context.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: cube_reco_context.h
3  * Description: Declaration of the Cube Recognition Context Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // The CubeRecoContext class abstracts the Cube OCR Engine. Typically a process
21 // (or a thread) would create one CubeRecoContext object per language.
22 // The CubeRecoContext object also provides methods to get and set the
23 // different attribues of the Cube OCR Engine.
24 
25 #ifndef CUBE_RECO_CONTEXT_H
26 #define CUBE_RECO_CONTEXT_H
27 
28 #include <string>
29 #include "neural_net.h"
30 #include "lang_model.h"
31 #include "classifier_base.h"
32 #include "feature_base.h"
33 #include "char_set.h"
34 #include "word_size_model.h"
35 #include "char_bigrams.h"
36 #include "word_unigrams.h"
37 
38 namespace tesseract {
39 
40 class Tesseract;
41 class TessdataManager;
42 
44  public:
45  // Reading order enum type
46  enum ReadOrder {
47  L2R,
49  };
50 
51  // Instantiate using a Tesseract object
52  CubeRecoContext(Tesseract *tess_obj);
53 
55 
56  // accessor functions
57  inline const string & Lang() const { return lang_; }
58  inline CharSet *CharacterSet() const { return char_set_; }
59  const UNICHARSET *TessUnicharset() const { return tess_unicharset_; }
60  inline CharClassifier *Classifier() const { return char_classifier_; }
61  inline WordSizeModel *SizeModel() const { return word_size_model_; }
62  inline CharBigrams *Bigrams() const { return char_bigrams_; }
63  inline WordUnigrams *WordUnigramsObj() const { return word_unigrams_; }
64  inline TuningParams *Params() const { return params_; }
65  inline LangModel *LangMod() const { return lang_mod_; }
66 
67  // the reading order of the language
68  inline ReadOrder ReadingOrder() const {
69  return ((lang_ == "ara") ? R2L : L2R);
70  }
71 
72  // does the language support case
73  inline bool HasCase() const {
74  return (lang_ != "ara" && lang_ != "hin");
75  }
76 
77  inline bool Cursive() const {
78  return (lang_ == "ara");
79  }
80 
81  inline bool HasItalics() const {
82  return (lang_ != "ara" && lang_ != "hin");
83  }
84 
85  inline bool Contextual() const {
86  return (lang_ == "ara");
87  }
88 
89  // RecoContext runtime flags accessor functions
90  inline bool SizeNormalization() const { return size_normalization_; }
91  inline bool NoisyInput() const { return noisy_input_; }
92  inline bool OOD() const { return lang_mod_->OOD(); }
93  inline bool Numeric() const { return lang_mod_->Numeric(); }
94  inline bool WordList() const { return lang_mod_->WordList(); }
95  inline bool Punc() const { return lang_mod_->Punc(); }
96  inline bool CaseSensitive() const {
97  return char_classifier_->CaseSensitive();
98  }
99 
100  inline void SetSizeNormalization(bool size_normalization) {
101  size_normalization_ = size_normalization;
102  }
103  inline void SetNoisyInput(bool noisy_input) {
104  noisy_input_ = noisy_input;
105  }
106  inline void SetOOD(bool ood_enabled) {
107  lang_mod_->SetOOD(ood_enabled);
108  }
109  inline void SetNumeric(bool numeric_enabled) {
110  lang_mod_->SetNumeric(numeric_enabled);
111  }
112  inline void SetWordList(bool word_list_enabled) {
113  lang_mod_->SetWordList(word_list_enabled);
114  }
115  inline void SetPunc(bool punc_enabled) {
116  lang_mod_->SetPunc(punc_enabled);
117  }
118  inline void SetCaseSensitive(bool case_sensitive) {
119  char_classifier_->SetCaseSensitive(case_sensitive);
120  }
122  return tess_obj_;
123  }
124 
125  // Returns the path of the data files
126  bool GetDataFilePath(string *path) const;
127  // Creates a CubeRecoContext object using a tesseract object. Data
128  // files are loaded via the tessdata_manager, and the tesseract
129  // unicharset is provided in order to map Cube's unicharset to
130  // Tesseract's in the case where the two unicharsets differ.
131  static CubeRecoContext *Create(Tesseract *tess_obj,
132  TessdataManager *tessdata_manager,
133  UNICHARSET *tess_unicharset);
134 
135  private:
136  bool loaded_;
137  string lang_;
138  CharSet *char_set_;
139  UNICHARSET *tess_unicharset_;
140  WordSizeModel *word_size_model_;
141  CharClassifier *char_classifier_;
142  CharBigrams *char_bigrams_;
143  WordUnigrams *word_unigrams_;
144  TuningParams *params_;
145  LangModel *lang_mod_;
146  Tesseract *tess_obj_; // CubeRecoContext does not own this pointer
147  bool size_normalization_;
148  bool noisy_input_;
149 
150  // Loads and initialized all the necessary components of a
151  // CubeRecoContext. See .cpp for more details.
152  bool Load(TessdataManager *tessdata_manager,
153  UNICHARSET *tess_unicharset);
154 };
155 }
156 
157 #endif // CUBE_RECO_CONTEXT_H
ReadOrder ReadingOrder() const
const UNICHARSET * TessUnicharset() const
void SetSizeNormalization(bool size_normalization)
void SetCaseSensitive(bool case_sensitive)
void SetOOD(bool ood)
Definition: lang_model.h:65
void SetCaseSensitive(bool case_sensitive)
static CubeRecoContext * Create(Tesseract *tess_obj, TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)
CharClassifier * Classifier() const
WordUnigrams * WordUnigramsObj() const
LangModel * LangMod() const
TuningParams * Params() const
tesseract::Tesseract * TesseractObject() const
void SetPunc(bool punc_enabled)
Definition: lang_model.h:68
const string & Lang() const
void SetNoisyInput(bool noisy_input)
void SetWordList(bool word_list_enabled)
CharSet * CharacterSet() const
bool GetDataFilePath(string *path) const
WordSizeModel * SizeModel() const
void SetOOD(bool ood_enabled)
void SetNumeric(bool numeric_enabled)
void SetPunc(bool punc_enabled)
void SetNumeric(bool numeric)
Definition: lang_model.h:66
CubeRecoContext(Tesseract *tess_obj)
CharBigrams * Bigrams() const
void SetWordList(bool word_list)
Definition: lang_model.h:67