tesseract  3.05.02
boxword.cpp
Go to the documentation of this file.
1 // File: boxword.h
3 // Description: Class to represent the bounding boxes of the output.
4 // Author: Ray Smith
5 // Created: Tue May 25 14:18:14 PDT 2010
6 //
7 // (C) Copyright 2010, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #include "blobs.h"
21 #include "boxword.h"
22 #include "normalis.h"
23 #include "ocrblock.h"
24 #include "pageres.h"
25 
26 namespace tesseract {
27 
28 // Clip output boxes to input blob boxes for bounds that are within this
29 // tolerance. Otherwise, the blob may be chopped and we have to just use
30 // the word bounding box.
31 const int kBoxClipTolerance = 2;
32 
33 BoxWord::BoxWord() : length_(0) {
34 }
35 
37  CopyFrom(src);
38 }
39 
41 }
42 
44  CopyFrom(src);
45  return *this;
46 }
47 
48 void BoxWord::CopyFrom(const BoxWord& src) {
49  bbox_ = src.bbox_;
50  length_ = src.length_;
51  boxes_.clear();
52  boxes_.reserve(length_);
53  for (int i = 0; i < length_; ++i)
54  boxes_.push_back(src.boxes_[i]);
55 }
56 
57 // Factory to build a BoxWord from a TWERD using the DENORMs on each blob to
58 // switch back to original image coordinates.
60  BoxWord* boxword = new BoxWord();
61  // Count the blobs.
62  boxword->length_ = tessword->NumBlobs();
63  // Allocate memory.
64  boxword->boxes_.reserve(boxword->length_);
65 
66  for (int b = 0; b < boxword->length_; ++b) {
67  TBLOB* tblob = tessword->blobs[b];
68  TBOX blob_box;
69  for (TESSLINE* outline = tblob->outlines; outline != NULL;
70  outline = outline->next) {
71  EDGEPT* edgept = outline->loop;
72  // Iterate over the edges.
73  do {
74  if (!edgept->IsHidden() || !edgept->prev->IsHidden()) {
75  ICOORD pos(edgept->pos.x, edgept->pos.y);
76  TPOINT denormed;
77  tblob->denorm().DenormTransform(NULL, edgept->pos, &denormed);
78  pos.set_x(denormed.x);
79  pos.set_y(denormed.y);
80  TBOX pt_box(pos, pos);
81  blob_box += pt_box;
82  }
83  edgept = edgept->next;
84  } while (edgept != outline->loop);
85  }
86  boxword->boxes_.push_back(blob_box);
87  }
88  boxword->ComputeBoundingBox();
89  return boxword;
90 }
91 
92 // Clean up the bounding boxes from the polygonal approximation by
93 // expanding slightly, then clipping to the blobs from the original_word
94 // that overlap. If not null, the block provides the inverse rotation.
95 void BoxWord::ClipToOriginalWord(const BLOCK* block, WERD* original_word) {
96  for (int i = 0; i < length_; ++i) {
97  TBOX box = boxes_[i];
98  // Expand by a single pixel, as the poly approximation error is 1 pixel.
99  box = TBOX(box.left() - 1, box.bottom() - 1,
100  box.right() + 1, box.top() + 1);
101  // Now find the original box that matches.
102  TBOX original_box;
103  C_BLOB_IT b_it(original_word->cblob_list());
104  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
105  TBOX blob_box = b_it.data()->bounding_box();
106  if (block != NULL)
107  blob_box.rotate(block->re_rotation());
108  if (blob_box.major_overlap(box)) {
109  original_box += blob_box;
110  }
111  }
112  if (!original_box.null_box()) {
113  if (NearlyEqual<int>(original_box.left(), box.left(), kBoxClipTolerance))
114  box.set_left(original_box.left());
115  if (NearlyEqual<int>(original_box.right(), box.right(),
117  box.set_right(original_box.right());
118  if (NearlyEqual<int>(original_box.top(), box.top(), kBoxClipTolerance))
119  box.set_top(original_box.top());
120  if (NearlyEqual<int>(original_box.bottom(), box.bottom(),
122  box.set_bottom(original_box.bottom());
123  }
124  original_box = original_word->bounding_box();
125  if (block != NULL)
126  original_box.rotate(block->re_rotation());
127  boxes_[i] = box.intersection(original_box);
128  }
129  ComputeBoundingBox();
130 }
131 
132 // Merges the boxes from start to end, not including end, and deletes
133 // the boxes between start and end.
134 void BoxWord::MergeBoxes(int start, int end) {
135  start = ClipToRange(start, 0, length_);
136  end = ClipToRange(end, 0, length_);
137  if (end <= start + 1)
138  return;
139  for (int i = start + 1; i < end; ++i) {
140  boxes_[start] += boxes_[i];
141  }
142  int shrinkage = end - 1 - start;
143  length_ -= shrinkage;
144  for (int i = start + 1; i < length_; ++i)
145  boxes_[i] = boxes_[i + shrinkage];
146  boxes_.truncate(length_);
147 }
148 
149 // Inserts a new box before the given index.
150 // Recomputes the bounding box.
151 void BoxWord::InsertBox(int index, const TBOX& box) {
152  if (index < length_)
153  boxes_.insert(box, index);
154  else
155  boxes_.push_back(box);
156  length_ = boxes_.size();
157  ComputeBoundingBox();
158 }
159 
160 // Changes the box at the given index to the new box.
161 // Recomputes the bounding box.
162 void BoxWord::ChangeBox(int index, const TBOX& box) {
163  boxes_[index] = box;
164  ComputeBoundingBox();
165 }
166 
167 // Deletes the box with the given index, and shuffles up the rest.
168 // Recomputes the bounding box.
169 void BoxWord::DeleteBox(int index) {
170  ASSERT_HOST(0 <= index && index < length_);
171  boxes_.remove(index);
172  --length_;
173  ComputeBoundingBox();
174 }
175 
176 // Deletes all the boxes stored in BoxWord.
178  length_ = 0;
179  boxes_.clear();
180  bbox_ = TBOX();
181 }
182 
183 // Computes the bounding box of the word.
184 void BoxWord::ComputeBoundingBox() {
185  bbox_ = TBOX();
186  for (int i = 0; i < length_; ++i)
187  bbox_ += boxes_[i];
188 }
189 
190 // This and other putatively are the same, so call the (permanent) callback
191 // for each blob index where the bounding boxes match.
192 // The callback is deleted on completion.
194  TessCallback1<int>* cb) const {
195  for (int i = 0; i < length_ && i < other.NumBlobs(); ++i) {
196  TBOX blob_box = other.blobs[i]->bounding_box();
197  if (blob_box == boxes_[i])
198  cb->Run(i);
199  }
200  delete cb;
201 }
202 
203 } // namespace tesseract.
void set_bottom(int y)
Definition: rect.h:64
void rotate(const FCOORD &vec)
Definition: rect.h:189
Definition: blobs.h:76
virtual void Run(A1)=0
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
integer coordinate
Definition: points.h:30
void InsertBox(int index, const TBOX &box)
Definition: boxword.cpp:151
void insert(T t, int index)
Definition: blobs.h:50
void remove(int index)
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
Definition: helpers.h:115
void DenormTransform(const DENORM *last_denorm, const TPOINT &pt, TPOINT *original) const
Definition: normalis.cpp:389
void ClipToOriginalWord(const BLOCK *block, WERD *original_word)
Definition: boxword.cpp:95
inT16 y
Definition: blobs.h:72
int push_back(T object)
void set_left(int x)
Definition: rect.h:71
EDGEPT * next
Definition: blobs.h:169
inT16 bottom() const
Definition: rect.h:61
const int kBoxClipTolerance
Definition: boxword.cpp:31
FCOORD re_rotation() const
Definition: ocrblock.h:138
TPOINT pos
Definition: blobs.h:163
TESSLINE * next
Definition: blobs.h:258
Definition: werd.h:60
void ProcessMatchedBlobs(const TWERD &other, TessCallback1< int > *cb) const
Definition: boxword.cpp:193
inT16 left() const
Definition: rect.h:68
void DeleteAllBoxes()
Definition: boxword.cpp:177
const DENORM & denorm() const
Definition: blobs.h:340
Definition: blobs.h:395
void truncate(int size)
inT16 x
Definition: blobs.h:71
void MergeBoxes(int start, int end)
Definition: boxword.cpp:134
BoxWord & operator=(const BoxWord &src)
Definition: boxword.cpp:43
bool IsHidden() const
Definition: blobs.h:153
int NumBlobs() const
Definition: blobs.h:425
void set_right(int x)
Definition: rect.h:78
int size() const
Definition: genericvector.h:72
TESSLINE * outlines
Definition: blobs.h:377
Definition: ocrblock.h:30
void ChangeBox(int index, const TBOX &box)
Definition: boxword.cpp:162
Definition: blobs.h:261
void set_y(inT16 yin)
rewrite function
Definition: points.h:65
inT16 top() const
Definition: rect.h:54
void set_top(int y)
Definition: rect.h:57
void set_x(inT16 xin)
rewrite function
Definition: points.h:61
bool major_overlap(const TBOX &box) const
Definition: rect.h:358
EDGEPT * prev
Definition: blobs.h:170
void DeleteBox(int index)
Definition: boxword.cpp:169
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
static BoxWord * CopyFromNormalized(TWERD *tessword)
Definition: boxword.cpp:59
void reserve(int size)
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:87
void CopyFrom(const BoxWord &src)
Definition: boxword.cpp:48
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
#define ASSERT_HOST(x)
Definition: errcode.h:84
TBOX bounding_box() const
Definition: werd.cpp:160