tesseract  3.05.02
cube_tuning_params.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: cube_tuning_params.cpp
3  * Description: Implementation of the CubeTuningParameters Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include <string>
21 #include <vector>
22 #include "cube_tuning_params.h"
23 #include "tuning_params.h"
24 #include "cube_utils.h"
25 
26 namespace tesseract {
28  reco_wgt_ = 1.0;
29  size_wgt_ = 1.0;
30  char_bigrams_wgt_ = 1.0;
31  word_unigrams_wgt_ = 0.0;
33  beam_width_ = 32;
35  tp_feat_ = BMP;
36  conv_grid_size_ = 32;
37  hist_wind_wid_ = 0;
44  ood_wgt_ = 1.0;
45  num_wgt_ = 1.0;
46 
47 }
48 
50 }
51 
52 // Create an Object given the data file path and the language by loading
53 // the approporiate file
54 CubeTuningParams *CubeTuningParams::Create(const string &data_file_path,
55  const string &lang) {
57 
58  string tuning_params_file;
59  tuning_params_file = data_file_path + lang;
60  tuning_params_file += ".cube.params";
61 
62  if (!obj->Load(tuning_params_file)) {
63  fprintf(stderr, "Cube ERROR (CubeTuningParams::Create): unable to "
64  "load tuning parameters from %s\n", tuning_params_file.c_str());
65  delete obj;
66  obj = NULL;
67  }
68 
69  return obj;
70 }
71 
72 // Loads the params file
73 bool CubeTuningParams::Load(string tuning_params_file) {
74  // load the string into memory
75  string param_str;
76 
77  if (CubeUtils::ReadFileToString(tuning_params_file, &param_str) == false) {
78  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unable to read "
79  "file %s\n", tuning_params_file.c_str());
80  return false;
81  }
82 
83  // split into lines
84  vector<string> str_vec;
85  CubeUtils::SplitStringUsing(param_str, "\r\n", &str_vec);
86  if (str_vec.size() < 8) {
87  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): number of rows "
88  "in parameter file is too low\n");
89  return false;
90  }
91 
92  // for all entries
93  for (int entry = 0; entry < str_vec.size(); entry++) {
94  // tokenize
95  vector<string> str_tok;
96 
97  // should be only two tokens
98  CubeUtils::SplitStringUsing(str_vec[entry], "=", &str_tok);
99  if (str_tok.size() != 2) {
100  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format in "
101  "line: %s.\n", str_vec[entry].c_str());
102  return false;
103  }
104 
105  double val = 0;
106  char peekchar = (str_tok[1].c_str())[0];
107  if ((peekchar >= '0' && peekchar <= '9') ||
108  peekchar == '-' || peekchar == '+' ||
109  peekchar == '.') {
110  // read the value
111  if (sscanf(str_tok[1].c_str(), "%lf", &val) != 1) {
112  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid format "
113  "in line: %s.\n", str_vec[entry].c_str());
114  return false;
115  }
116  }
117 
118  // token type
119  if (str_tok[0] == "RecoWgt") {
120  reco_wgt_ = val;
121  } else if (str_tok[0] == "SizeWgt") {
122  size_wgt_ = val;
123  } else if (str_tok[0] == "CharBigramsWgt") {
124  char_bigrams_wgt_ = val;
125  } else if (str_tok[0] == "WordUnigramsWgt") {
126  word_unigrams_wgt_ = val;
127  } else if (str_tok[0] == "MaxSegPerChar") {
128  max_seg_per_char_ = static_cast<int>(val);
129  } else if (str_tok[0] == "BeamWidth") {
130  beam_width_ = static_cast<int>(val);
131  } else if (str_tok[0] == "Classifier") {
132  if (str_tok[1] == "NN") {
134  } else if (str_tok[1] == "HYBRID_NN") {
136  } else {
137  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid "
138  "classifier type in line: %s.\n", str_vec[entry].c_str());
139  return false;
140  }
141  } else if (str_tok[0] == "FeatureType") {
142  if (str_tok[1] == "BMP") {
144  } else if (str_tok[1] == "CHEBYSHEV") {
146  } else if (str_tok[1] == "HYBRID") {
148  } else {
149  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): invalid feature "
150  "type in line: %s.\n", str_vec[entry].c_str());
151  return false;
152  }
153  } else if (str_tok[0] == "ConvGridSize") {
154  conv_grid_size_ = static_cast<int>(val);
155  } else if (str_tok[0] == "HistWindWid") {
156  hist_wind_wid_ = val;
157  } else if (str_tok[0] == "MinConCompSize") {
158  min_con_comp_size_ = val;
159  } else if (str_tok[0] == "MaxWordAspectRatio") {
161  } else if (str_tok[0] == "MinSpaceHeightRatio") {
163  } else if (str_tok[0] == "MaxSpaceHeightRatio") {
165  } else if (str_tok[0] == "CombinerRunThresh") {
166  combiner_run_thresh_ = val;
167  } else if (str_tok[0] == "CombinerClassifierThresh") {
169  } else if (str_tok[0] == "OODWgt") {
170  ood_wgt_ = val;
171  } else if (str_tok[0] == "NumWgt") {
172  num_wgt_ = val;
173  } else {
174  fprintf(stderr, "Cube ERROR (CubeTuningParams::Load): unknown parameter "
175  "in line: %s.\n", str_vec[entry].c_str());
176  return false;
177  }
178  }
179 
180  return true;
181 }
182 
183 // Save the parameters to a file
184 bool CubeTuningParams::Save(string file_name) {
185  FILE *params_file = fopen(file_name.c_str(), "wb");
186  if (params_file == NULL) {
187  fprintf(stderr, "Cube ERROR (CubeTuningParams::Save): error opening file "
188  "%s for write.\n", file_name.c_str());
189  return false;
190  }
191 
192  fprintf(params_file, "RecoWgt=%.4f\n", reco_wgt_);
193  fprintf(params_file, "SizeWgt=%.4f\n", size_wgt_);
194  fprintf(params_file, "CharBigramsWgt=%.4f\n", char_bigrams_wgt_);
195  fprintf(params_file, "WordUnigramsWgt=%.4f\n", word_unigrams_wgt_);
196  fprintf(params_file, "MaxSegPerChar=%d\n", max_seg_per_char_);
197  fprintf(params_file, "BeamWidth=%d\n", beam_width_);
198  fprintf(params_file, "ConvGridSize=%d\n", conv_grid_size_);
199  fprintf(params_file, "HistWindWid=%d\n", hist_wind_wid_);
200  fprintf(params_file, "MinConCompSize=%d\n", min_con_comp_size_);
201  fprintf(params_file, "MaxWordAspectRatio=%.4f\n", max_word_aspect_ratio_);
202  fprintf(params_file, "MinSpaceHeightRatio=%.4f\n", min_space_height_ratio_);
203  fprintf(params_file, "MaxSpaceHeightRatio=%.4f\n", max_space_height_ratio_);
204  fprintf(params_file, "CombinerRunThresh=%.4f\n", combiner_run_thresh_);
205  fprintf(params_file, "CombinerClassifierThresh=%.4f\n",
207  fprintf(params_file, "OODWgt=%.4f\n", ood_wgt_);
208  fprintf(params_file, "NumWgt=%.4f\n", num_wgt_);
209 
210  fclose(params_file);
211  return true;
212 }
213 }
type_classifer tp_classifier_
bool Load(string file_name)
static bool ReadFileToString(const string &file_name, string *str)
Definition: cube_utils.cpp:189
static CubeTuningParams * Create(const string &data_file, const string &lang)
static void SplitStringUsing(const string &str, const string &delims, vector< string > *str_vec)
Definition: cube_utils.cpp:220