tesseract  3.05.02
cube_reco_context.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: cube_reco_context.cpp
3  * Description: Implementation of the Cube Recognition Context Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include <string>
21 #include <limits.h>
22 
23 #include "cube_reco_context.h"
24 
25 #include "classifier_factory.h"
26 #include "cube_tuning_params.h"
27 #include "dict.h"
28 #include "feature_bmp.h"
29 #include "tessdatamanager.h"
30 #include "tesseractclass.h"
31 #include "tess_lang_model.h"
32 
33 namespace tesseract {
34 
43  tess_obj_ = tess_obj;
44  lang_ = "";
45  loaded_ = false;
46  lang_mod_ = NULL;
47  params_ = NULL;
48  char_classifier_ = NULL;
49  char_set_ = NULL;
50  word_size_model_ = NULL;
51  char_bigrams_ = NULL;
52  word_unigrams_ = NULL;
53  noisy_input_ = false;
54  size_normalization_ = false;
55 }
56 
58  delete char_classifier_;
59  char_classifier_ = NULL;
60 
61  delete word_size_model_;
62  word_size_model_ = NULL;
63 
64  delete char_set_;
65  char_set_ = NULL;
66 
67  delete char_bigrams_;
68  char_bigrams_ = NULL;
69 
70  delete word_unigrams_;
71  word_unigrams_ = NULL;
72 
73  delete lang_mod_;
74  lang_mod_ = NULL;
75 
76  delete params_;
77  params_ = NULL;
78 }
79 
84 bool CubeRecoContext::GetDataFilePath(string *path) const {
85  *path = tess_obj_->datadir.string();
86  return true;
87 }
88 
101 bool CubeRecoContext::Load(TessdataManager *tessdata_manager,
102  UNICHARSET *tess_unicharset) {
103  ASSERT_HOST(tess_obj_ != NULL);
104  tess_unicharset_ = tess_unicharset;
105  string data_file_path;
106 
107  // Get the data file path.
108  if (GetDataFilePath(&data_file_path) == false) {
109  fprintf(stderr, "Unable to get data file path\n");
110  return false;
111  }
112 
113  // Get the language from the Tesseract object.
114  lang_ = tess_obj_->lang.string();
115 
116  // Create the char set.
117  if ((char_set_ =
118  CharSet::Create(tessdata_manager, tess_unicharset)) == NULL) {
119  fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
120  "CharSet\n");
121  return false;
122  }
123  // Create the language model.
124  string lm_file_name = data_file_path + lang_ + ".cube.lm";
125  string lm_params;
126  if (!CubeUtils::ReadFileToString(lm_file_name, &lm_params)) {
127  fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read cube "
128  "language model params from %s\n", lm_file_name.c_str());
129  return false;
130  }
131  lang_mod_ = new TessLangModel(lm_params, data_file_path,
132  tess_obj_->getDict().load_system_dawg,
133  tessdata_manager, this);
134 
135  // Create the optional char bigrams object.
136  char_bigrams_ = CharBigrams::Create(data_file_path, lang_);
137 
138  // Create the optional word unigrams object.
139  word_unigrams_ = WordUnigrams::Create(data_file_path, lang_);
140 
141  // Create the optional size model.
142  word_size_model_ = WordSizeModel::Create(data_file_path, lang_,
143  char_set_, Contextual());
144 
145  // Load tuning params.
146  params_ = CubeTuningParams::Create(data_file_path, lang_);
147  if (params_ == NULL) {
148  fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to read "
149  "CubeTuningParams from %s\n", data_file_path.c_str());
150  return false;
151  }
152 
153  // Create the char classifier.
154  char_classifier_ = CharClassifierFactory::Create(data_file_path, lang_,
155  lang_mod_, char_set_,
156  params_);
157  if (char_classifier_ == NULL) {
158  fprintf(stderr, "Cube ERROR (CubeRecoContext::Load): unable to load "
159  "CharClassifierFactory object from %s\n", data_file_path.c_str());
160  return false;
161  }
162 
163  loaded_ = true;
164 
165  return true;
166 }
167 
170  TessdataManager *tessdata_manager,
171  UNICHARSET *tess_unicharset) {
172  // create the object
173  CubeRecoContext *cntxt = new CubeRecoContext(tess_obj);
174  // load the necessary components
175  if (cntxt->Load(tessdata_manager, tess_unicharset) == false) {
176  fprintf(stderr, "Cube ERROR (CubeRecoContext::Create): unable to init "
177  "CubeRecoContext object\n");
178  delete cntxt;
179  return NULL;
180  }
181  // success
182  return cntxt;
183 }
184 } // tesseract}
static CharSet * Create(TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)
Definition: char_set.cpp:54
STRING lang
Definition: ccutil.h:67
static CharBigrams * Create(const string &data_file_path, const string &lang)
STRING datadir
Definition: ccutil.h:65
static bool ReadFileToString(const string &file_name, string *str)
Definition: cube_utils.cpp:189
static CubeRecoContext * Create(Tesseract *tess_obj, TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)
static WordUnigrams * Create(const string &data_file_path, const string &lang)
static CharClassifier * Create(const string &data_file_path, const string &lang, LangModel *lang_mod, CharSet *char_set, TuningParams *params)
const char * string() const
Definition: strngs.cpp:201
static CubeTuningParams * Create(const string &data_file, const string &lang)
Dict & getDict()
Definition: classify.h:65
bool GetDataFilePath(string *path) const
bool load_system_dawg
Definition: dict.h:562
static WordSizeModel * Create(const string &data_file_path, const string &lang, CharSet *char_set, bool contextual)
CubeRecoContext(Tesseract *tess_obj)
#define ASSERT_HOST(x)
Definition: errcode.h:84