tesseract  3.05.02
pageiterator.cpp
Go to the documentation of this file.
1 // File: pageiterator.cpp
3 // Description: Iterator for tesseract page structure that avoids using
4 // tesseract internal data structures.
5 // Author: Ray Smith
6 // Created: Fri Feb 26 14:32:09 PST 2010
7 //
8 // (C) Copyright 2010, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #include "pageiterator.h"
22 #include "allheaders.h"
23 #include "helpers.h"
24 #include "pageres.h"
25 #include "tesseractclass.h"
26 
27 namespace tesseract {
28 
30  int scaled_yres, int rect_left, int rect_top,
31  int rect_width, int rect_height)
32  : page_res_(page_res),
33  tesseract_(tesseract),
34  word_(NULL),
35  word_length_(0),
36  blob_index_(0),
37  cblob_it_(NULL),
38  include_upper_dots_(false),
39  include_lower_dots_(false),
40  scale_(scale),
41  scaled_yres_(scaled_yres),
42  rect_left_(rect_left),
43  rect_top_(rect_top),
44  rect_width_(rect_width),
45  rect_height_(rect_height) {
46  it_ = new PAGE_RES_IT(page_res);
48 }
49 
51  delete it_;
52  delete cblob_it_;
53 }
54 
61  : page_res_(src.page_res_),
62  tesseract_(src.tesseract_),
63  word_(NULL),
64  word_length_(src.word_length_),
65  blob_index_(src.blob_index_),
66  cblob_it_(NULL),
67  include_upper_dots_(src.include_upper_dots_),
68  include_lower_dots_(src.include_lower_dots_),
69  scale_(src.scale_),
70  scaled_yres_(src.scaled_yres_),
71  rect_left_(src.rect_left_),
72  rect_top_(src.rect_top_),
73  rect_width_(src.rect_width_),
74  rect_height_(src.rect_height_) {
75  it_ = new PAGE_RES_IT(*src.it_);
77 }
78 
80  page_res_ = src.page_res_;
81  tesseract_ = src.tesseract_;
84  scale_ = src.scale_;
86  rect_left_ = src.rect_left_;
87  rect_top_ = src.rect_top_;
90  delete it_;
91  it_ = new PAGE_RES_IT(*src.it_);
93  return *this;
94 }
95 
97  return (it_ == NULL && it_ == other) ||
98  ((other != NULL) && (it_ != NULL) && (*it_ == *other));
99 }
100 
101 // ============= Moving around within the page ============.
102 
106  BeginWord(0);
107 }
108 
110  if (it_->block() == NULL) return; // At end of the document.
111  PAGE_RES_IT para(page_res_);
112  PAGE_RES_IT next_para(para);
113  next_para.forward_paragraph();
114  while (next_para.cmp(*it_) <= 0) {
115  para = next_para;
116  next_para.forward_paragraph();
117  }
118  *it_ = para;
119  BeginWord(0);
120 }
121 
123  PageIterator p_start(*this);
124  p_start.RestartParagraph();
125  return p_start.it_->row() == it_->row();
126 }
127 
129  it_->restart_row();
130  BeginWord(0);
131 }
132 
147  if (it_->block() == NULL) return false; // Already at the end!
148  if (it_->word() == NULL)
149  level = RIL_BLOCK;
150 
151  switch (level) {
152  case RIL_BLOCK:
153  it_->forward_block();
154  break;
155  case RIL_PARA:
157  break;
158  case RIL_TEXTLINE:
159  for (it_->forward_with_empties(); it_->row() == it_->prev_row();
161  break;
162  case RIL_WORD:
164  break;
165  case RIL_SYMBOL:
166  if (cblob_it_ != NULL)
167  cblob_it_->forward();
168  ++blob_index_;
169  if (blob_index_ >= word_length_)
171  else
172  return true;
173  break;
174  }
175  BeginWord(0);
176  return it_->block() != NULL;
177 }
178 
185  if (it_->block() == NULL) return false; // Already at the end!
186  if (it_->word() == NULL) return true; // In an image block.
187  switch (level) {
188  case RIL_BLOCK:
189  return blob_index_ == 0 && it_->block() != it_->prev_block();
190  case RIL_PARA:
191  return blob_index_ == 0 &&
192  (it_->block() != it_->prev_block() ||
193  it_->row()->row->para() != it_->prev_row()->row->para());
194  case RIL_TEXTLINE:
195  return blob_index_ == 0 && it_->row() != it_->prev_row();
196  case RIL_WORD:
197  return blob_index_ == 0;
198  case RIL_SYMBOL:
199  return true;
200  }
201  return false;
202 }
203 
209  PageIteratorLevel element) const {
210  if (Empty(element)) return true; // Already at the end!
211  // The result is true if we step forward by element and find we are
212  // at the the end of the page or at beginning of *all* levels in:
213  // [level, element).
214  // When there is more than one level difference between element and level,
215  // we could for instance move forward one symbol and still be at the first
216  // word on a line, so we also have to be at the first symbol in a word.
217  PageIterator next(*this);
218  next.Next(element);
219  if (next.Empty(element)) return true; // Reached the end of the page.
220  while (element > level) {
221  element = static_cast<PageIteratorLevel>(element - 1);
222  if (!next.IsAtBeginningOf(element))
223  return false;
224  }
225  return true;
226 }
227 
234 int PageIterator::Cmp(const PageIterator &other) const {
235  int word_cmp = it_->cmp(*other.it_);
236  if (word_cmp != 0)
237  return word_cmp;
238  if (blob_index_ < other.blob_index_)
239  return -1;
240  if (blob_index_ == other.blob_index_)
241  return 0;
242  return 1;
243 }
244 
245 // ============= Accessing data ==============.
246 // Coordinate system:
247 // Integer coordinates are at the cracks between the pixels.
248 // The top-left corner of the top-left pixel in the image is at (0,0).
249 // The bottom-right corner of the bottom-right pixel in the image is at
250 // (width, height).
251 // Every bounding box goes from the top-left of the top-left contained
252 // pixel to the bottom-right of the bottom-right contained pixel, so
253 // the bounding box of the single top-left pixel in the image is:
254 // (0,0)->(1,1).
255 // If an image rectangle has been set in the API, then returned coordinates
256 // relate to the original (full) image, rather than the rectangle.
257 
265  int* left, int* top,
266  int* right, int* bottom) const {
267  if (Empty(level))
268  return false;
269  TBOX box;
270  PARA *para = NULL;
271  switch (level) {
272  case RIL_BLOCK:
275  break;
276  case RIL_PARA:
277  para = it_->row()->row->para();
278  // explicit fall-through.
279  case RIL_TEXTLINE:
282  break;
283  case RIL_WORD:
286  break;
287  case RIL_SYMBOL:
288  if (cblob_it_ == NULL)
289  box = it_->word()->box_word->BlobBox(blob_index_);
290  else
291  box = cblob_it_->data()->bounding_box();
292  }
293  if (level == RIL_PARA) {
294  PageIterator other = *this;
295  other.Begin();
296  do {
297  if (other.it_->block() &&
298  other.it_->block()->block == it_->block()->block &&
299  other.it_->row() && other.it_->row()->row &&
300  other.it_->row()->row->para() == para) {
301  box = box.bounding_union(other.it_->row()->row->bounding_box());
302  }
303  } while (other.Next(RIL_TEXTLINE));
304  }
305  if (level != RIL_SYMBOL || cblob_it_ != NULL)
306  box.rotate(it_->block()->block->re_rotation());
307  // Now we have a box in tesseract coordinates relative to the image rectangle,
308  // we have to convert the coords to a top-down system.
309  const int pix_height = pixGetHeight(tesseract_->pix_binary());
310  const int pix_width = pixGetWidth(tesseract_->pix_binary());
311  *left = ClipToRange(static_cast<int>(box.left()), 0, pix_width);
312  *top = ClipToRange(pix_height - box.top(), 0, pix_height);
313  *right = ClipToRange(static_cast<int>(box.right()), *left, pix_width);
314  *bottom = ClipToRange(pix_height - box.bottom(), *top, pix_height);
315  return true;
316 }
317 
325  int* left, int* top,
326  int* right, int* bottom) const {
327  return BoundingBox(level, 0, left, top, right, bottom);
328 }
329 
330 bool PageIterator::BoundingBox(PageIteratorLevel level, const int padding,
331  int* left, int* top,
332  int* right, int* bottom) const {
333  if (!BoundingBoxInternal(level, left, top, right, bottom))
334  return false;
335  // Convert to the coordinate system of the original image.
336  *left = ClipToRange(*left / scale_ + rect_left_ - padding,
338  *top = ClipToRange(*top / scale_ + rect_top_ - padding,
340  *right = ClipToRange((*right + scale_ - 1) / scale_ + rect_left_ + padding,
341  *left, rect_left_ + rect_width_);
342  *bottom = ClipToRange((*bottom + scale_ - 1) / scale_ + rect_top_ + padding,
343  *top, rect_top_ + rect_height_);
344  return true;
345 }
346 
349  if (it_->block() == NULL) return true; // Already at the end!
350  if (it_->word() == NULL && level != RIL_BLOCK) return true; // image block
351  if (level == RIL_SYMBOL && blob_index_ >= word_length_)
352  return true; // Zero length word, or already at the end of it.
353  return false;
354 }
355 
358  if (it_->block() == NULL || it_->block()->block == NULL)
359  return PT_UNKNOWN; // Already at the end!
360  if (it_->block()->block->poly_block() == NULL)
361  return PT_FLOWING_TEXT; // No layout analysis used - assume text.
362  return it_->block()->block->poly_block()->isA();
363 }
364 
368  if (it_->block() == NULL || it_->block()->block == NULL)
369  return NULL; // Already at the end!
370  if (it_->block()->block->poly_block() == NULL)
371  return NULL; // No layout analysis used - no polygon.
372  ICOORDELT_IT it(it_->block()->block->poly_block()->points());
373  Pta* pta = ptaCreate(it.length());
374  int num_pts = 0;
375  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), ++num_pts) {
376  ICOORD* pt = it.data();
377  // Convert to top-down coords within the input image.
378  float x = static_cast<float>(pt->x()) / scale_ + rect_left_;
379  float y = rect_top_ + rect_height_ - static_cast<float>(pt->y()) / scale_;
380  ptaAddPt(pta, x, y);
381  }
382  return pta;
383 }
384 
408  int left, top, right, bottom;
409  if (!BoundingBoxInternal(level, &left, &top, &right, &bottom))
410  return NULL;
411  if (level == RIL_SYMBOL && cblob_it_ != NULL &&
412  cblob_it_->data()->area() != 0)
413  return cblob_it_->data()->render();
414  Box* box = boxCreate(left, top, right - left, bottom - top);
415  Pix* pix = pixClipRectangle(tesseract_->pix_binary(), box, NULL);
416  boxDestroy(&box);
417  if (level == RIL_BLOCK || level == RIL_PARA) {
418  // Clip to the block polygon as well.
419  TBOX mask_box;
420  Pix* mask = it_->block()->block->render_mask(&mask_box);
421  int mask_x = left - mask_box.left();
422  int mask_y = top - (tesseract_->ImageHeight() - mask_box.top());
423  // AND the mask and pix, putting the result in pix.
424  pixRasterop(pix, MAX(0, -mask_x), MAX(0, -mask_y), pixGetWidth(pix),
425  pixGetHeight(pix), PIX_SRC & PIX_DST, mask, MAX(0, mask_x),
426  MAX(0, mask_y));
427  pixDestroy(&mask);
428  }
429  return pix;
430 }
431 
444  Pix* original_img,
445  int* left, int* top) const {
446  int right, bottom;
447  if (!BoundingBox(level, left, top, &right, &bottom))
448  return NULL;
449  if (original_img == NULL)
450  return GetBinaryImage(level);
451 
452  // Expand the box.
453  *left = MAX(*left - padding, 0);
454  *top = MAX(*top - padding, 0);
455  right = MIN(right + padding, rect_width_);
456  bottom = MIN(bottom + padding, rect_height_);
457  Box* box = boxCreate(*left, *top, right - *left, bottom - *top);
458  Pix* grey_pix = pixClipRectangle(original_img, box, NULL);
459  boxDestroy(&box);
460  if (level == RIL_BLOCK || level == RIL_PARA) {
461  // Clip to the block polygon as well.
462  TBOX mask_box;
463  Pix* mask = it_->block()->block->render_mask(&mask_box);
464  // Copy the mask registered correctly into an image the size of grey_pix.
465  int mask_x = *left - mask_box.left();
466  int mask_y = *top - (pixGetHeight(original_img) - mask_box.top());
467  int width = pixGetWidth(grey_pix);
468  int height = pixGetHeight(grey_pix);
469  Pix* resized_mask = pixCreate(width, height, 1);
470  pixRasterop(resized_mask, MAX(0, -mask_x), MAX(0, -mask_y), width, height,
471  PIX_SRC, mask, MAX(0, mask_x), MAX(0, mask_y));
472  pixDestroy(&mask);
473  pixDilateBrick(resized_mask, resized_mask, 2 * padding + 1,
474  2 * padding + 1);
475  pixInvert(resized_mask, resized_mask);
476  pixSetMasked(grey_pix, resized_mask, MAX_UINT32);
477  pixDestroy(&resized_mask);
478  }
479  return grey_pix;
480 }
481 
488  int* x1, int* y1, int* x2, int* y2) const {
489  if (it_->word() == NULL) return false; // Already at the end!
490  ROW* row = it_->row()->row;
491  WERD* word = it_->word()->word;
492  TBOX box = (level == RIL_WORD || level == RIL_SYMBOL)
493  ? word->bounding_box()
494  : row->bounding_box();
495  int left = box.left();
496  ICOORD startpt(left, static_cast<inT16>(row->base_line(left) + 0.5));
497  int right = box.right();
498  ICOORD endpt(right, static_cast<inT16>(row->base_line(right) + 0.5));
499  // Rotate to image coordinates and convert to global image coords.
500  startpt.rotate(it_->block()->block->re_rotation());
501  endpt.rotate(it_->block()->block->re_rotation());
502  *x1 = startpt.x() / scale_ + rect_left_;
503  *y1 = (rect_height_ - startpt.y()) / scale_ + rect_top_;
504  *x2 = endpt.x() / scale_ + rect_left_;
505  *y2 = (rect_height_ - endpt.y()) / scale_ + rect_top_;
506  return true;
507 }
508 
510  tesseract::WritingDirection *writing_direction,
511  tesseract::TextlineOrder *textline_order,
512  float *deskew_angle) const {
513  BLOCK* block = it_->block()->block;
514 
515  // Orientation
516  FCOORD up_in_image(0.0, 1.0);
517  up_in_image.unrotate(block->classify_rotation());
518  up_in_image.rotate(block->re_rotation());
519 
520  if (up_in_image.x() == 0.0F) {
521  if (up_in_image.y() > 0.0F) {
522  *orientation = ORIENTATION_PAGE_UP;
523  } else {
524  *orientation = ORIENTATION_PAGE_DOWN;
525  }
526  } else if (up_in_image.x() > 0.0F) {
527  *orientation = ORIENTATION_PAGE_RIGHT;
528  } else {
529  *orientation = ORIENTATION_PAGE_LEFT;
530  }
531 
532  // Writing direction
533  bool is_vertical_text = (block->classify_rotation().x() == 0.0);
534  bool right_to_left = block->right_to_left();
535  *writing_direction =
536  is_vertical_text
538  : (right_to_left
541 
542  // Textline Order
543  bool is_mongolian = false; // TODO(eger): fix me
544  *textline_order = is_vertical_text
545  ? (is_mongolian
549 
550  // Deskew angle
551  FCOORD skew = block->skew(); // true horizontal for textlines
552  *deskew_angle = -skew.angle();
553 }
554 
556  bool *is_list_item,
557  bool *is_crown,
558  int *first_line_indent) const {
560  if (!it_->row() || !it_->row()->row || !it_->row()->row->para() ||
561  !it_->row()->row->para()->model)
562  return;
563 
564  PARA *para = it_->row()->row->para();
565  *is_list_item = para->is_list_item;
566  *is_crown = para->is_very_first_or_continuation;
567  *first_line_indent = para->model->first_indent() -
568  para->model->body_indent();
569  *just = para->model->justification();
570 }
571 
576 void PageIterator::BeginWord(int offset) {
577  WERD_RES* word_res = it_->word();
578  if (word_res == NULL) {
579  // This is a non-text block, so there is no word.
580  word_length_ = 0;
581  blob_index_ = 0;
582  word_ = NULL;
583  return;
584  }
585  if (word_res->best_choice != NULL) {
586  // Recognition has been done, so we are using the box_word, which
587  // is already baseline denormalized.
588  word_length_ = word_res->best_choice->length();
589  if (word_res->box_word != NULL) {
590  if (word_res->box_word->length() != word_length_) {
591  tprintf("Corrupted word! best_choice[len=%d] = %s, box_word[len=%d]: ",
593  word_res->box_word->length());
594  word_res->box_word->bounding_box().print();
595  }
596  ASSERT_HOST(word_res->box_word->length() == word_length_);
597  }
598  word_ = NULL;
599  // We will be iterating the box_word.
600  delete cblob_it_;
601  cblob_it_ = NULL;
602  } else {
603  // No recognition yet, so a "symbol" is a cblob.
604  word_ = word_res->word;
605  ASSERT_HOST(word_->cblob_list() != NULL);
606  word_length_ = word_->cblob_list()->length();
607  if (cblob_it_ == NULL) cblob_it_ = new C_BLOB_IT;
608  cblob_it_->set_to_list(word_->cblob_list());
609  }
610  for (blob_index_ = 0; blob_index_ < offset; ++blob_index_) {
611  if (cblob_it_ != NULL)
612  cblob_it_->forward();
613  }
614 }
615 
617  if (it_->word() != NULL) {
618  it_->word()->blamer_bundle = blamer_bundle;
619  return true;
620  } else {
621  return false;
622  }
623 }
624 
625 } // namespace tesseract.
ROW * row
Definition: pageres.h:127
BLOCK * block
Definition: pageres.h:99
Pix * GetBinaryImage(PageIteratorLevel level) const
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:129
void rotate(const FCOORD &vec)
Definition: rect.h:189
virtual bool IsAtBeginningOf(PageIteratorLevel level) const
virtual bool IsAtFinalElement(PageIteratorLevel level, PageIteratorLevel element) const
ParagraphJustification
Definition: publictypes.h:239
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: werd.cpp:164
TBOX bounding_box() const
Definition: ocrrow.h:85
const TBOX & bounding_box() const
Definition: boxword.h:82
int cmp(const PAGE_RES_IT &other) const
Definition: pageres.cpp:1201
PARA * para() const
Definition: ocrrow.h:115
PageIterator(PAGE_RES *page_res, Tesseract *tesseract, int scale, int scaled_yres, int rect_left, int rect_top, int rect_width, int rect_height)
WERD_RES * word() const
Definition: pageres.h:736
integer coordinate
Definition: points.h:30
BLOCK_RES * prev_block() const
Definition: pageres.h:733
float base_line(float xpos) const
Definition: ocrrow.h:56
const STRING & unichar_string() const
Definition: ratngs.h:525
ICOORDELT_LIST * points()
Definition: polyblk.h:42
bool IsWithinFirstTextlineOfParagraph() const
bool BoundingBox(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
#define MIN(x, y)
Definition: ndminx.h:28
bool SetWordBlamerBundle(BlamerBundle *blamer_bundle)
PolyBlockType isA() const
Definition: polyblk.h:48
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
ROW_RES * row() const
Definition: pageres.h:739
int length() const
Definition: boxword.h:85
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
WERD_RES * forward_with_empties()
Definition: pageres.h:720
TESS_LOCAL void BeginWord(int offset)
WERD_CHOICE * best_choice
Definition: pageres.h:219
bool BoundingBoxInternal(PageIteratorLevel level, int *left, int *top, int *right, int *bottom) const
Definition: ocrpara.h:29
WERD_RES * forward_paragraph()
Definition: pageres.cpp:1652
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
void ParagraphInfo(tesseract::ParagraphJustification *justification, bool *is_list_item, bool *is_crown, int *first_line_indent) const
PolyBlockType BlockType() const
WERD_RES * forward_block()
Definition: pageres.cpp:1667
inT16 bottom() const
Definition: rect.h:61
const ParagraphModel * model
Definition: ocrpara.h:36
FCOORD skew() const
Definition: ocrblock.h:150
tesseract::BoxWord * box_word
Definition: pageres.h:250
Pix * render_mask(TBOX *mask_box)
Definition: ocrblock.h:164
int Cmp(const PageIterator &other) const
FCOORD re_rotation() const
Definition: ocrblock.h:138
const char * string() const
Definition: strngs.cpp:201
bool PositionedAtSameWord(const PAGE_RES_IT *other) const
bool is_very_first_or_continuation
Definition: ocrpara.h:43
void rotate(const FCOORD vec)
Definition: ipoints.h:471
Definition: werd.h:60
int body_indent() const
Definition: ocrpara.h:169
inT16 x() const
access function
Definition: points.h:52
Pta * BlockPolygon() const
BlamerBundle * blamer_bundle
Definition: pageres.h:230
Pix * pix_binary() const
WERD_RES * restart_page_with_empties()
Definition: pageres.h:686
bool Empty(PageIteratorLevel level) const
virtual bool Next(PageIteratorLevel level)
tesseract::ParagraphJustification justification() const
Definition: ocrpara.h:164
inT16 left() const
Definition: rect.h:68
float y() const
Definition: points.h:212
void print() const
Definition: rect.h:270
int length() const
Definition: ratngs.h:301
virtual void RestartRow()
virtual void RestartParagraph()
void rotate(const FCOORD &vec)
Definition: ipoints.h:241
ROW_RES * prev_row() const
Definition: pageres.h:730
BLOCK_RES * block() const
Definition: pageres.h:742
const PageIterator & operator=(const PageIterator &src)
bool is_list_item
Definition: ocrpara.h:38
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: ocrrow.cpp:85
#define MAX(x, y)
Definition: ndminx.h:24
bool Baseline(PageIteratorLevel level, int *x1, int *y1, int *x2, int *y2) const
#define tprintf(...)
Definition: tprintf.h:31
Definition: ocrrow.h:32
#define MAX_UINT32
Definition: host.h:56
Definition: points.h:189
Definition: ocrblock.h:30
const TBOX & BlobBox(int index) const
Definition: boxword.h:86
inT16 top() const
Definition: rect.h:54
WERD_RES * restart_row()
Definition: pageres.cpp:1637
bool right_to_left() const
Definition: ocrblock.h:83
FCOORD classify_rotation() const
Definition: ocrblock.h:144
float x() const
Definition: points.h:209
int first_indent() const
Definition: ocrpara.h:168
Pix * GetImage(PageIteratorLevel level, int padding, Pix *original_img, int *left, int *top) const
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
void unrotate(const FCOORD &vec)
Definition: ipoints.h:480
WERD * word
Definition: pageres.h:175
PolyBlockType
Definition: publictypes.h:41
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
float angle() const
find angle
Definition: points.h:249
#define ASSERT_HOST(x)
Definition: errcode.h:84
TBOX restricted_bounding_box(bool upper_dots, bool lower_dots) const
Definition: ocrblock.cpp:91
TBOX bounding_box() const
Definition: werd.cpp:160
inT16 y() const
access_function
Definition: points.h:56