tesseract  3.05.02
cube_control.cpp
Go to the documentation of this file.
1 /******************************************************************
2  * File: cube_control.cpp
3  * Description: Tesseract class methods for invoking cube convolutional
4  * neural network word recognizer.
5  * Author: Raquel Romano
6  * Created: September 2009
7  *
8  * (C) Copyright 2009, Google Inc.
9  ** Licensed under the Apache License, Version 2.0 (the "License");
10  ** you may not use this file except in compliance with the License.
11  ** You may obtain a copy of the License at
12  ** http://www.apache.org/licenses/LICENSE-2.0
13  ** Unless required by applicable law or agreed to in writing, software
14  ** distributed under the License is distributed on an "AS IS" BASIS,
15  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16  ** See the License for the specific language governing permissions and
17  ** limitations under the License.
18  **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "allheaders.h"
26 
27 #include "cube_object.h"
28 #include "cube_reco_context.h"
29 #include "tesseractclass.h"
31 
32 namespace tesseract {
33 
40 static float convert_prob_to_tess_certainty(float prob) {
41  return (prob - 1.0) * 20.0;
42 }
43 
52 TBOX char_box_to_tbox(Box* char_box, TBOX word_box, int x_offset) {
53  l_int32 left;
54  l_int32 top;
55  l_int32 width;
56  l_int32 height;
57  l_int32 right;
58  l_int32 bottom;
59 
60  boxGetGeometry(char_box, &left, &top, &width, &height);
61  left += word_box.left() - x_offset;
62  right = left + width;
63  top = word_box.bottom() + word_box.height() - top;
64  bottom = top - height;
65  return TBOX(left, bottom, right, top);
66 }
67 
76  int* num_chars,
77  Boxa** char_boxes,
78  CharSamp*** char_samples) {
79  if (!cube_obj) {
80  if (cube_debug_level > 0) {
81  tprintf("Cube WARNING (extract_cube_state): Invalid cube object "
82  "passed to extract_cube_state\n");
83  }
84  return false;
85  }
86 
87  // Note that the CubeObject accessors return either the deslanted or
88  // regular objects search object or beam search object, whichever
89  // was used in the last call to Recognize()
90  CubeSearchObject* cube_search_obj = cube_obj->SrchObj();
91  if (!cube_search_obj) {
92  if (cube_debug_level > 0) {
93  tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
94  "cube's search object in extract_cube_state.\n");
95  }
96  return false;
97  }
98  BeamSearch *beam_search_obj = cube_obj->BeamObj();
99  if (!beam_search_obj) {
100  if (cube_debug_level > 0) {
101  tprintf("Cube WARNING (Extract_cube_state): Could not retrieve "
102  "cube's beam search object in extract_cube_state.\n");
103  }
104  return false;
105  }
106 
107  // Get the character samples and bounding boxes by backtracking
108  // through the beam search path
109  int best_node_index = beam_search_obj->BestPresortedNodeIndex();
110  *char_samples = beam_search_obj->BackTrack(
111  cube_search_obj, best_node_index, num_chars, NULL, char_boxes);
112  if (!*char_samples)
113  return false;
114  return true;
115 }
116 
126 bool Tesseract::create_cube_box_word(Boxa *char_boxes,
127  int num_chars,
128  TBOX word_box,
129  BoxWord* box_word) {
130  if (!box_word) {
131  if (cube_debug_level > 0) {
132  tprintf("Cube WARNING (create_cube_box_word): Invalid box_word.\n");
133  }
134  return false;
135  }
136 
137  // Find the x-coordinate of left-most char_box, which could be
138  // nonzero if the word image was padded before recognition took place.
139  int x_offset = -1;
140  for (int i = 0; i < num_chars; ++i) {
141  Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
142  if (x_offset < 0 || char_box->x < x_offset) {
143  x_offset = char_box->x;
144  }
145  boxDestroy(&char_box);
146  }
147 
148  for (int i = 0; i < num_chars; ++i) {
149  Box* char_box = boxaGetBox(char_boxes, i, L_CLONE);
150  TBOX tbox = char_box_to_tbox(char_box, word_box, x_offset);
151  boxDestroy(&char_box);
152  box_word->InsertBox(i, tbox);
153  }
154  return true;
155 }
156 
164 bool Tesseract::init_cube_objects(bool load_combiner,
165  TessdataManager *tessdata_manager) {
166  ASSERT_HOST(cube_cntxt_ == NULL);
167  ASSERT_HOST(tess_cube_combiner_ == NULL);
168 
169  // Create the cube context object
170  cube_cntxt_ = CubeRecoContext::Create(this, tessdata_manager, &unicharset);
171  if (cube_cntxt_ == NULL) {
172  if (cube_debug_level > 0) {
173  tprintf("Cube WARNING (Tesseract::init_cube_objects()): Failed to "
174  "instantiate CubeRecoContext\n");
175  }
176  return false;
177  }
178 
179  // Create the combiner object and load the combiner net for target languages.
180  if (load_combiner) {
181  tess_cube_combiner_ = new tesseract::TesseractCubeCombiner(cube_cntxt_);
182  if (!tess_cube_combiner_->LoadCombinerNet()) {
183  delete cube_cntxt_;
184  cube_cntxt_ = NULL;
185  delete tess_cube_combiner_;
186  tess_cube_combiner_ = NULL;
187  if (cube_debug_level > 0)
188  tprintf("Cube ERROR (Failed to instantiate TesseractCubeCombiner\n");
189  return false;
190  }
191  }
192  return true;
193 }
194 
202  if (page_res == NULL || tess_cube_combiner_ == NULL)
203  return;
204  PAGE_RES_IT page_res_it(page_res);
205  // Iterate through the word results and call cube on each word.
206  for (page_res_it.restart_page(); page_res_it.word () != NULL;
207  page_res_it.forward()) {
208  BLOCK* block = page_res_it.block()->block;
209  if (block->poly_block() != NULL && !block->poly_block()->IsText())
210  continue; // Don't deal with non-text blocks.
211  WERD_RES* word = page_res_it.word();
212  // Skip cube entirely if tesseract's certainty is greater than threshold.
213  int combiner_run_thresh = convert_prob_to_tess_certainty(
214  cube_cntxt_->Params()->CombinerRunThresh());
215  if (word->best_choice->certainty() >= combiner_run_thresh) {
216  continue;
217  }
218  // Use the same language as Tesseract used for the word.
219  Tesseract* lang_tess = word->tesseract;
220 
221  // Setup a trial WERD_RES in which to classify with cube.
222  WERD_RES cube_word;
223  cube_word.InitForRetryRecognition(*word);
224  cube_word.SetupForRecognition(lang_tess->unicharset, this, BestPix(),
226  NULL, false, false, false,
227  page_res_it.row()->row,
228  page_res_it.block()->block);
229  CubeObject *cube_obj = lang_tess->cube_recognize_word(
230  page_res_it.block()->block, &cube_word);
231  if (cube_obj != NULL)
232  lang_tess->cube_combine_word(cube_obj, &cube_word, word);
233  delete cube_obj;
234  }
235 }
236 
243 void Tesseract::cube_word_pass1(BLOCK* block, ROW *row, WERD_RES *word) {
244  CubeObject *cube_obj = cube_recognize_word(block, word);
245  delete cube_obj;
246 }
247 
255  if (!cube_binary_ || !cube_cntxt_) {
256  if (cube_debug_level > 0 && !cube_binary_)
257  tprintf("Tesseract::run_cube(): NULL binary image.\n");
258  word->SetupFake(unicharset);
259  return NULL;
260  }
261  TBOX word_box = word->word->bounding_box();
262  if (block != NULL && (block->re_rotation().x() != 1.0f ||
263  block->re_rotation().y() != 0.0f)) {
264  // TODO(rays) We have to rotate the bounding box to get the true coords.
265  // This will be achieved in the future via DENORM.
266  // In the mean time, cube can't process this word.
267  if (cube_debug_level > 0) {
268  tprintf("Cube can't process rotated word at:");
269  word_box.print();
270  }
271  word->SetupFake(unicharset);
272  return NULL;
273  }
274  CubeObject* cube_obj = new tesseract::CubeObject(
275  cube_cntxt_, cube_binary_, word_box.left(),
276  pixGetHeight(cube_binary_) - word_box.top(),
277  word_box.width(), word_box.height());
278  if (!cube_recognize(cube_obj, block, word)) {
279  delete cube_obj;
280  return NULL;
281  }
282  return cube_obj;
283 }
284 
292  WERD_RES* tess_word) {
293  float combiner_prob = tess_cube_combiner_->CombineResults(tess_word,
294  cube_obj);
295  // If combiner probability is greater than tess/cube combiner
296  // classifier threshold, i.e. tesseract wins, then just return the
297  // tesseract result unchanged, as the combiner knows nothing about how
298  // correct the answer is. If cube and tesseract agree, then improve the
299  // scores before returning.
300  WERD_CHOICE* tess_best = tess_word->best_choice;
301  WERD_CHOICE* cube_best = cube_word->best_choice;
303  tprintf("Combiner prob = %g vs threshold %g\n",
304  combiner_prob, cube_cntxt_->Params()->CombinerClassifierThresh());
305  }
306  if (combiner_prob >=
307  cube_cntxt_->Params()->CombinerClassifierThresh()) {
308  if (tess_best->unichar_string() == cube_best->unichar_string()) {
309  // Cube and tess agree, so improve the scores.
310  tess_best->set_rating(tess_best->rating() / 2);
311  tess_best->set_certainty(tess_best->certainty() / 2);
312  }
313  return;
314  }
315  // Cube wins.
316  // It is better for the language combiner to have all tesseract scores,
317  // so put them in the cube result.
318  cube_best->set_rating(tess_best->rating());
319  cube_best->set_certainty(tess_best->certainty());
321  tprintf("Cube INFO: tesseract result replaced by cube: %s -> %s\n",
322  tess_best->unichar_string().string(),
323  cube_best->unichar_string().string());
324  }
325  tess_word->ConsumeWordResults(cube_word);
326 }
327 
335  WERD_RES *word) {
336  // Run cube
337  WordAltList *cube_alt_list = cube_obj->RecognizeWord();
338  if (!cube_alt_list || cube_alt_list->AltCount() <= 0) {
339  if (cube_debug_level > 0) {
340  tprintf("Cube returned nothing for word at:");
341  word->word->bounding_box().print();
342  }
343  word->SetupFake(unicharset);
344  return false;
345  }
346 
347  // Get cube's best result and its probability, mapped to tesseract's
348  // certainty range
349  char_32 *cube_best_32 = cube_alt_list->Alt(0);
350  double cube_prob = CubeUtils::Cost2Prob(cube_alt_list->AltCost(0));
351  float cube_certainty = convert_prob_to_tess_certainty(cube_prob);
352  string cube_best_str;
353  CubeUtils::UTF32ToUTF8(cube_best_32, &cube_best_str);
354 
355  // Retrieve Cube's character bounding boxes and CharSamples,
356  // corresponding to the most recent call to RecognizeWord().
357  Boxa *char_boxes = NULL;
358  CharSamp **char_samples = NULL;;
359  int num_chars;
360  if (!extract_cube_state(cube_obj, &num_chars, &char_boxes, &char_samples)
361  && cube_debug_level > 0) {
362  tprintf("Cube WARNING (Tesseract::cube_recognize): Cannot extract "
363  "cube state.\n");
364  word->SetupFake(unicharset);
365  return false;
366  }
367 
368  // Convert cube's character bounding boxes to a BoxWord.
369  BoxWord cube_box_word;
370  TBOX tess_word_box = word->word->bounding_box();
371  if (word->denorm.block() != NULL)
372  tess_word_box.rotate(word->denorm.block()->re_rotation());
373  bool box_word_success = create_cube_box_word(char_boxes, num_chars,
374  tess_word_box,
375  &cube_box_word);
376  boxaDestroy(&char_boxes);
377  if (!box_word_success) {
378  if (cube_debug_level > 0) {
379  tprintf("Cube WARNING (Tesseract::cube_recognize): Could not "
380  "create cube BoxWord\n");
381  }
382  word->SetupFake(unicharset);
383  return false;
384  }
385 
386  // Fill tesseract result's fields with cube results
387  fill_werd_res(cube_box_word, cube_best_str.c_str(), word);
388 
389  // Create cube's best choice.
390  BLOB_CHOICE** choices = new BLOB_CHOICE*[num_chars];
391  for (int i = 0; i < num_chars; ++i) {
392  UNICHAR_ID uch_id =
393  cube_cntxt_->CharacterSet()->UnicharID(char_samples[i]->StrLabel());
394  choices[i] = new BLOB_CHOICE(uch_id, -cube_certainty, cube_certainty,
395  -1, 0.0f, 0.0f, 0.0f, BCC_STATIC_CLASSIFIER);
396  }
397  word->FakeClassifyWord(num_chars, choices);
398  // within a word, cube recognizes the word in reading order.
400  delete [] choices;
401  delete [] char_samples;
402 
403  // Some sanity checks
404  ASSERT_HOST(word->best_choice->length() == word->reject_map.length());
405 
407  tprintf("Cube result: %s r=%g, c=%g\n",
408  word->best_choice->unichar_string().string(),
409  word->best_choice->rating(),
410  word->best_choice->certainty());
411  }
412  return true;
413 }
414 
421 void Tesseract::fill_werd_res(const BoxWord& cube_box_word,
422  const char* cube_best_str,
423  WERD_RES* tess_werd_res) {
424  delete tess_werd_res->box_word;
425  tess_werd_res->box_word = new BoxWord(cube_box_word);
426  tess_werd_res->box_word->ClipToOriginalWord(tess_werd_res->denorm.block(),
427  tess_werd_res->word);
428  // Fill text and remaining fields
429  tess_werd_res->word->set_text(cube_best_str);
430  tess_werd_res->tess_failed = FALSE;
431  tess_werd_res->tess_accepted = tess_acceptable_word(tess_werd_res);
432  // There is no output word, so we can' call AdaptableWord, but then I don't
433  // think we need to. Fudge the result with accepted.
434  tess_werd_res->tess_would_adapt = tess_werd_res->tess_accepted;
435 
436  // Set word to done, i.e., ignore all of tesseract's tests for rejection
437  tess_werd_res->done = tess_werd_res->tess_accepted;
438 }
439 
440 } // namespace tesseract
ROW * row
Definition: pageres.h:127
BLOCK * block
Definition: pageres.h:99
bool init_cube_objects(bool load_combiner, TessdataManager *tessdata_manager)
CubeObject * cube_recognize_word(BLOCK *block, WERD_RES *word)
void rotate(const FCOORD &vec)
Definition: rect.h:189
BOOL8 tess_accepted
Definition: pageres.h:280
bool create_cube_box_word(Boxa *char_boxes, int num_chars, TBOX word_box, BoxWord *box_word)
WERD_RES * word() const
Definition: pageres.h:736
int UnicharID(const char_32 *str) const
Definition: char_set.h:80
BOOL8 tess_would_adapt
Definition: pageres.h:281
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:151
const STRING & unichar_string() const
Definition: ratngs.h:525
void set_rating(float new_val)
Definition: ratngs.h:367
bool tess_acceptable_word(WERD_RES *word)
Definition: tessbox.cpp:69
bool cube_recognize(CubeObject *cube_obj, BLOCK *block, WERD_RES *word)
double CombinerClassifierThresh() const
Definition: tuning_params.h:63
CharSamp ** BackTrack(SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
void ConsumeWordResults(WERD_RES *word)
Definition: pageres.cpp:757
inT32 length() const
Definition: rejctmap.h:236
inT16 width() const
Definition: rect.h:111
float certainty() const
Definition: ratngs.h:328
bool set_unichars_in_script_order(bool in_script_order)
Definition: ratngs.h:515
double CombinerRunThresh() const
Definition: tuning_params.h:62
ROW_RES * row() const
Definition: pageres.h:739
char_32 * Alt(int alt_idx)
Definition: word_altlist.h:41
WERD_RES * restart_page()
Definition: pageres.h:683
bool extract_cube_state(CubeObject *cube_obj, int *num_chars, Boxa **char_boxes, CharSamp ***char_samples)
WERD_CHOICE * best_choice
Definition: pageres.h:219
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
Definition: boxword.cpp:95
BOOL8 tess_failed
Definition: pageres.h:272
void fill_werd_res(const BoxWord &cube_box_word, const char *cube_best_str, WERD_RES *tess_werd_res)
static CubeRecoContext * Create(Tesseract *tess_obj, TessdataManager *tessdata_manager, UNICHARSET *tess_unicharset)
void run_cube_combiner(PAGE_RES *page_res)
static double Cost2Prob(int cost)
Definition: cube_utils.cpp:47
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
void SetupFake(const UNICHARSET &uch)
Definition: pageres.cpp:343
TessdataManager tessdata_manager
Definition: ccutil.h:69
void cube_word_pass1(BLOCK *block, ROW *row, WERD_RES *word)
inT16 bottom() const
Definition: rect.h:61
tesseract::BoxWord * box_word
Definition: pageres.h:250
FCOORD re_rotation() const
Definition: ocrblock.h:138
const char * string() const
Definition: strngs.cpp:201
static void UTF32ToUTF8(const char_32 *utf32_str, string *str)
Definition: cube_utils.cpp:272
void set_certainty(float new_val)
Definition: ratngs.h:370
TuningParams * Params() const
const BLOCK * block() const
Definition: normalis.h:275
#define FALSE
Definition: capi.h:46
int BestPresortedNodeIndex() const
Definition: beam_search.h:81
WERD_RES * forward()
Definition: pageres.h:716
bool SetupForRecognition(const UNICHARSET &unicharset_in, tesseract::Tesseract *tesseract, Pix *pix, int norm_mode, const TBOX *norm_box, bool numeric_mode, bool use_body_size, bool allow_detailed_fx, ROW *row, const BLOCK *block)
Definition: pageres.cpp:294
inT16 left() const
Definition: rect.h:68
WordAltList * RecognizeWord(LangModel *lang_mod=NULL)
float y() const
Definition: points.h:212
void print() const
Definition: rect.h:270
int length() const
Definition: ratngs.h:301
float rating() const
Definition: ratngs.h:325
BLOCK_RES * block() const
Definition: pageres.h:742
void InitForRetryRecognition(const WERD_RES &source)
Definition: pageres.cpp:269
inT16 height() const
Definition: rect.h:104
CharSet * CharacterSet() const
signed int char_32
Definition: string_32.h:40
#define tprintf(...)
Definition: tprintf.h:31
void FakeClassifyWord(int blob_count, BLOB_CHOICE **choices)
Definition: pageres.cpp:872
Definition: ocrrow.h:32
bool IsText() const
Definition: polyblk.h:52
Definition: ocrblock.h:30
Pix * BestPix() const
float CombineResults(WERD_RES *tess_res, CubeObject *cube_obj)
BeamSearch * BeamObj() const
Definition: cube_object.h:114
inT16 top() const
Definition: rect.h:54
tesseract::Tesseract * tesseract
Definition: pageres.h:266
int AltCost(int alt_idx) const
Definition: altlist.h:41
float x() const
Definition: points.h:209
Definition: rect.h:30
TBOX char_box_to_tbox(Box *char_box, TBOX word_box, int x_offset)
int AltCount() const
Definition: altlist.h:39
UNICHARSET unicharset
Definition: ccutil.h:70
WERD * word
Definition: pageres.h:175
BOOL8 done
Definition: pageres.h:282
CubeSearchObject * SrchObj() const
Definition: cube_object.h:124
DENORM denorm
Definition: pageres.h:190
#define ASSERT_HOST(x)
Definition: errcode.h:84
TBOX bounding_box() const
Definition: werd.cpp:160
REJMAP reject_map
Definition: pageres.h:271
void cube_combine_word(CubeObject *cube_obj, WERD_RES *cube_word, WERD_RES *tess_word)
void set_text(const char *new_text)
Definition: werd.h:126
int UNICHAR_ID
Definition: unichar.h:33