tesseract  3.05.02
cube_search_object.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: cube_search_object.cpp
3  * Description: Implementation of the Cube Search Object Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "cube_search_object.h"
21 #include "cube_utils.h"
22 #include "ndminx.h"
23 
24 namespace tesseract {
25 
26 const bool CubeSearchObject::kUseCroppedChars = true;
27 
29  : SearchObject(cntxt) {
30  init_ = false;
31  reco_cache_ = NULL;
32  samp_cache_ = NULL;
33  segments_ = NULL;
34  segment_cnt_ = 0;
35  samp_ = samp;
36  left_ = 0;
37  itop_ = 0;
38  space_cost_ = NULL;
39  no_space_cost_ = NULL;
40  wid_ = samp_->Width();
41  hgt_ = samp_->Height();
42  max_seg_per_char_ = cntxt_->Params()->MaxSegPerChar();
44  min_spc_gap_ =
45  static_cast<int>(hgt_ * cntxt_->Params()->MinSpaceHeightRatio());
46  max_spc_gap_ =
47  static_cast<int>(hgt_ * cntxt_->Params()->MaxSpaceHeightRatio());
48 }
49 
51  Cleanup();
52 }
53 
54 // Cleanup
55 void CubeSearchObject::Cleanup() {
56  // delete Recognition Cache
57  if (reco_cache_) {
58  for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) {
59  if (reco_cache_[strt_seg]) {
60  for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) {
61  if (reco_cache_[strt_seg][end_seg]) {
62  delete reco_cache_[strt_seg][end_seg];
63  }
64  }
65  delete []reco_cache_[strt_seg];
66  }
67  }
68  delete []reco_cache_;
69  reco_cache_ = NULL;
70  }
71 
72  // delete CharSamp Cache
73  if (samp_cache_) {
74  for (int strt_seg = 0; strt_seg < segment_cnt_; strt_seg++) {
75  if (samp_cache_[strt_seg]) {
76  for (int end_seg = 0; end_seg < segment_cnt_; end_seg++) {
77  if (samp_cache_[strt_seg][end_seg]) {
78  delete samp_cache_[strt_seg][end_seg];
79  }
80  }
81  delete []samp_cache_[strt_seg];
82  }
83  }
84  delete []samp_cache_;
85  samp_cache_ = NULL;
86  }
87 
88  // delete segment list
89  if (segments_) {
90  for (int seg = 0; seg < segment_cnt_; seg++) {
91  if (segments_[seg]) {
92  delete segments_[seg];
93  }
94  }
95  delete []segments_;
96  segments_ = NULL;
97  }
98 
99  if (space_cost_) {
100  delete []space_cost_;
101  space_cost_ = NULL;
102  }
103 
104  if (no_space_cost_) {
105  delete []no_space_cost_;
106  no_space_cost_ = NULL;
107  }
108 
109  segment_cnt_ = 0;
110  init_ = false;
111 }
112 
113 // # of segmentation points. One less than the count of segments
115  if (!init_ && !Init())
116  return -1;
117  return segment_cnt_ - 1;
118 }
119 
120 // init and allocate variables, perform segmentation
121 bool CubeSearchObject::Init() {
122  if (init_)
123  return true;
124  if (!Segment()) {
125  return false;
126  }
127 
128  // init cache
129  reco_cache_ = new CharAltList **[segment_cnt_];
130 
131  samp_cache_ = new CharSamp **[segment_cnt_];
132 
133  for (int seg = 0; seg < segment_cnt_; seg++) {
134  reco_cache_[seg] = new CharAltList *[segment_cnt_];
135  memset(reco_cache_[seg], 0, segment_cnt_ * sizeof(*reco_cache_[seg]));
136 
137  samp_cache_[seg] = new CharSamp *[segment_cnt_];
138  memset(samp_cache_[seg], 0, segment_cnt_ * sizeof(*samp_cache_[seg]));
139  }
140 
141  init_ = true;
142  return true;
143 }
144 
145 // returns a char sample corresponding to the bitmap between 2 seg pts
146 CharSamp *CubeSearchObject::CharSample(int start_pt, int end_pt) {
147  // init if necessary
148  if (!init_ && !Init())
149  return NULL;
150  // validate segment range
151  if (!IsValidSegmentRange(start_pt, end_pt))
152  return NULL;
153 
154  // look for the samp in the cache
155  if (samp_cache_ && samp_cache_[start_pt + 1] &&
156  samp_cache_[start_pt + 1][end_pt]) {
157  return samp_cache_[start_pt + 1][end_pt];
158  }
159  // create a char samp object from the specified range of segments
160  bool left_most;
161  bool right_most;
162  CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1,
163  end_pt - start_pt, NULL,
164  &left_most, &right_most, hgt_);
165  if (!samp)
166  return NULL;
167 
168  if (kUseCroppedChars) {
169  CharSamp *cropped_samp = samp->Crop();
170  // we no longer need the orig sample
171  delete samp;
172  if (!cropped_samp)
173  return NULL;
174  samp = cropped_samp;
175  }
176 
177  // get the dimensions of the new cropped sample
178  int char_top = samp->Top();
179  int char_wid = samp->Width();
180  int char_hgt = samp->Height();
181 
182  // for cursive languages, these features correspond to whether
183  // the charsamp is at the beginning or end of conncomp
184  if (cntxt_->Cursive() == true) {
185  // first and last char flags depend on reading order
186  bool first_char = rtl_ ? right_most : left_most;
187  bool last_char = rtl_ ? left_most : right_most;
188 
189  samp->SetFirstChar(first_char ? 255 : 0);
190  samp->SetLastChar(last_char ? 255 : 0);
191  } else {
192  // for non cursive languages, these features correspond
193  // to whether the charsamp is at the beginning or end of the word
194  samp->SetFirstChar((start_pt == -1) ? 255 : 0);
195  samp->SetLastChar((end_pt == (segment_cnt_ - 1)) ? 255 : 0);
196  }
197  samp->SetNormTop(255 * char_top / hgt_);
198  samp->SetNormBottom(255 * (char_top + char_hgt) / hgt_);
199  samp->SetNormAspectRatio(255 * char_wid / (char_wid + char_hgt));
200 
201  // add to cache & return
202  samp_cache_[start_pt + 1][end_pt] = samp;
203  return samp;
204 }
205 
206 Box *CubeSearchObject::CharBox(int start_pt, int end_pt) {
207  if (!init_ && !Init())
208  return NULL;
209  if (!IsValidSegmentRange(start_pt, end_pt)) {
210  fprintf(stderr, "Cube ERROR (CubeSearchObject::CharBox): invalid "
211  "segment range (%d, %d)\n", start_pt, end_pt);
212  return NULL;
213  }
214 
215  // create a char samp object from the specified range of segments,
216  // extract its dimensions into a leptonica box, and delete it
217  bool left_most;
218  bool right_most;
219  CharSamp *samp = CharSamp::FromConComps(segments_, start_pt + 1,
220  end_pt - start_pt, NULL,
221  &left_most, &right_most, hgt_);
222  if (!samp)
223  return NULL;
224  if (kUseCroppedChars) {
225  CharSamp *cropped_samp = samp->Crop();
226  delete samp;
227  if (!cropped_samp) {
228  return NULL;
229  }
230  samp = cropped_samp;
231  }
232  Box *box = boxCreate(samp->Left(), samp->Top(),
233  samp->Width(), samp->Height());
234  delete samp;
235  return box;
236 }
237 
238 // call from Beam Search to return the alt list corresponding to
239 // recognizing the bitmap between two segmentation pts
240 CharAltList * CubeSearchObject::RecognizeSegment(int start_pt, int end_pt) {
241  // init if necessary
242  if (!init_ && !Init()) {
243  fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could "
244  "not initialize CubeSearchObject\n");
245  return NULL;
246  }
247 
248  // validate segment range
249  if (!IsValidSegmentRange(start_pt, end_pt)) {
250  fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): invalid "
251  "segment range (%d, %d)\n", start_pt, end_pt);
252  return NULL;
253  }
254 
255  // look for the recognition results in cache in the cache
256  if (reco_cache_ && reco_cache_[start_pt + 1] &&
257  reco_cache_[start_pt + 1][end_pt]) {
258  return reco_cache_[start_pt + 1][end_pt];
259  }
260 
261  // create the char sample corresponding to the blob
262  CharSamp *samp = CharSample(start_pt, end_pt);
263  if (!samp) {
264  fprintf(stderr, "Cube ERROR (CubeSearchObject::RecognizeSegment): could "
265  "not construct CharSamp\n");
266  return NULL;
267  }
268 
269  // recognize the char sample
270  CharClassifier *char_classifier = cntxt_->Classifier();
271  if (char_classifier) {
272  reco_cache_[start_pt + 1][end_pt] = char_classifier->Classify(samp);
273  } else {
274  // no classifer: all characters are equally probable; add a penalty
275  // that favors 2-segment characters and aspect ratios (w/h) > 1
276  fprintf(stderr, "Cube WARNING (CubeSearchObject::RecognizeSegment): cube "
277  "context has no character classifier!! Inventing a probability "
278  "distribution.\n");
279  int class_cnt = cntxt_->CharacterSet()->ClassCount();
280  CharAltList *alt_list = new CharAltList(cntxt_->CharacterSet(), class_cnt);
281  int seg_cnt = end_pt - start_pt;
282  double prob_val = (1.0 / class_cnt) *
283  exp(-fabs(seg_cnt - 2.0)) *
284  exp(-samp->Width() / static_cast<double>(samp->Height()));
285 
286  for (int class_idx = 0; class_idx < class_cnt; class_idx++) {
287  alt_list->Insert(class_idx, CubeUtils::Prob2Cost(prob_val));
288  }
289  reco_cache_[start_pt + 1][end_pt] = alt_list;
290  }
291 
292  return reco_cache_[start_pt + 1][end_pt];
293 }
294 
295 // Perform segmentation of the bitmap by detecting connected components,
296 // segmenting each connected component using windowed vertical pixel density
297 // histogram and sorting the resulting segments in reading order
298 bool CubeSearchObject::Segment() {
299  if (!samp_)
300  return false;
301  segment_cnt_ = 0;
302  segments_ = samp_->Segment(&segment_cnt_, rtl_,
303  cntxt_->Params()->HistWindWid(),
305  if (!segments_ || segment_cnt_ <= 0) {
306  return false;
307  }
308  if (segment_cnt_ >= kMaxSegmentCnt) {
309  return false;
310  }
311  return true;
312 }
313 
314 // computes the space and no space costs at gaps between segments
315 bool CubeSearchObject::ComputeSpaceCosts() {
316  // init if necessary
317  if (!init_ && !Init())
318  return false;
319 
320  // Already computed
321  if (space_cost_)
322  return true;
323 
324  // No segmentation points
325  if (segment_cnt_ < 2)
326  return false;
327 
328  // Compute the maximum x to the left of and minimum x to the right of each
329  // segmentation point
330  int *max_left_x = new int[segment_cnt_ - 1];
331  int *min_right_x = new int[segment_cnt_ - 1];
332  if (rtl_) {
333  min_right_x[0] = segments_[0]->Left();
334  max_left_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Right();
335  for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) {
336  min_right_x[pt_idx] =
337  MIN(min_right_x[pt_idx - 1], segments_[pt_idx]->Left());
338  max_left_x[segment_cnt_ - pt_idx - 2] =
339  MAX(max_left_x[segment_cnt_ - pt_idx - 1],
340  segments_[segment_cnt_ - pt_idx - 1]->Right());
341  }
342  } else {
343  min_right_x[segment_cnt_ - 2] = segments_[segment_cnt_ - 1]->Left();
344  max_left_x[0] = segments_[0]->Right();
345  for (int pt_idx = 1; pt_idx < (segment_cnt_ - 1); pt_idx++) {
346  min_right_x[segment_cnt_ - pt_idx - 2] =
347  MIN(min_right_x[segment_cnt_ - pt_idx - 1],
348  segments_[segment_cnt_ - pt_idx - 1]->Left());
349  max_left_x[pt_idx] =
350  MAX(max_left_x[pt_idx - 1], segments_[pt_idx]->Right());
351  }
352  }
353 
354  // Allocate memory for space and no space costs
355  // trivial cases
356  space_cost_ = new int[segment_cnt_ - 1];
357  no_space_cost_ = new int[segment_cnt_ - 1];
358 
359  // go through all segmentation points determining the horizontal gap between
360  // the images on both sides of each break points. Use the gap to estimate
361  // the probability of a space. The probability is modeled a linear function
362  // of the gap width
363  for (int pt_idx = 0; pt_idx < (segment_cnt_ - 1); pt_idx++) {
364  // determine the gap at the segmentation point
365  int gap = min_right_x[pt_idx] - max_left_x[pt_idx];
366  float prob = 0.0;
367 
368  // gap is too small => no space
369  if (gap < min_spc_gap_ || max_spc_gap_ == min_spc_gap_) {
370  prob = 0.0;
371  } else if (gap > max_spc_gap_) {
372  // gap is too big => definite space
373  prob = 1.0;
374  } else {
375  // gap is somewhere in between, compute probability
376  prob = (gap - min_spc_gap_) /
377  static_cast<double>(max_spc_gap_ - min_spc_gap_);
378  }
379 
380  // compute cost of space and non-space
381  space_cost_[pt_idx] = CubeUtils::Prob2Cost(prob) +
383  no_space_cost_[pt_idx] = CubeUtils::Prob2Cost(1.0 - prob);
384  }
385 
386  delete []min_right_x;
387  delete []max_left_x;
388 
389  return true;
390 }
391 
392 // Returns the cost of having a space before the specified segmentation point
394  if (!space_cost_ && !ComputeSpaceCosts()) {
395  // Failed to compute costs return a zero prob
396  return CubeUtils::Prob2Cost(0.0);
397  }
398  return space_cost_[pt_idx];
399 }
400 
401 // Returns the cost of not having a space before the specified
402 // segmentation point
404  // If failed to compute costs, return a 1.0 prob
405  if (!space_cost_ && !ComputeSpaceCosts())
406  return CubeUtils::Prob2Cost(0.0);
407  return no_space_cost_[pt_idx];
408 }
409 
410 // Returns the cost of not having any spaces within the specified range
411 // of segmentation points
412 int CubeSearchObject::NoSpaceCost(int st_pt, int end_pt) {
413  // If fail to compute costs, return a 1.0 prob
414  if (!space_cost_ && !ComputeSpaceCosts())
415  return CubeUtils::Prob2Cost(1.0);
416  int no_spc_cost = 0;
417  for (int pt_idx = st_pt + 1; pt_idx < end_pt; pt_idx++)
418  no_spc_cost += NoSpaceCost(pt_idx);
419  return no_spc_cost;
420 }
421 }
double MaxSpaceHeightRatio() const
Definition: tuning_params.h:61
unsigned short Height() const
Definition: bmp_8.h:50
ReadOrder ReadingOrder() const
CubeSearchObject(CubeRecoContext *cntxt, CharSamp *samp)
virtual CharAltList * Classify(CharSamp *char_samp)=0
bool Insert(int class_id, int cost, void *tag=NULL)
void SetFirstChar(unsigned short first_char)
Definition: char_samp.h:96
void SetNormAspectRatio(unsigned short norm_aspect_ratio)
Definition: char_samp.h:93
CharSamp * Crop()
Definition: char_samp.cpp:338
#define MIN(x, y)
Definition: ndminx.h:28
unsigned short Width() const
Definition: bmp_8.h:48
int Left() const
Definition: con_comp.h:65
CharClassifier * Classifier() const
static int Prob2Cost(double prob_val)
Definition: cube_utils.cpp:37
unsigned short Top() const
Definition: char_samp.h:48
int ClassCount() const
Definition: char_set.h:111
TuningParams * Params() const
ConComp ** Segment(int *seg_cnt, bool right_2_left, int max_hist_wnd, int min_con_comp_size) const
Definition: char_samp.cpp:372
void SetNormBottom(unsigned short norm_bottom)
Definition: char_samp.h:90
double MinSpaceHeightRatio() const
Definition: tuning_params.h:60
CharSet * CharacterSet() const
#define MAX(x, y)
Definition: ndminx.h:24
Box * CharBox(int start_pt, int end_pt)
CharSamp * CharSample(int start_pt, int end_pt)
void SetNormTop(unsigned short norm_top)
Definition: char_samp.h:89
CharAltList * RecognizeSegment(int start_pt, int end_pt)
CubeRecoContext * cntxt_
Definition: search_object.h:51
unsigned short Left() const
Definition: char_samp.h:46
static CharSamp * FromConComps(ConComp **concomp_array, int strt_concomp, int seg_flags_size, int *seg_flags, bool *left_most, bool *right_most, int word_hgt)
Definition: char_samp.cpp:439
int MinConCompSize() const
Definition: tuning_params.h:58
int Right() const
Definition: con_comp.h:67
void SetLastChar(unsigned short last_char)
Definition: char_samp.h:99