tesseract  3.05.02
seam.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: seam.c (Formerly seam.c)
5  * Description:
6  * Author: Mark Seaman, OCR Technology
7  * Created: Fri Oct 16 14:37:00 1987
8  * Modified: Fri May 17 16:30:13 1991 (Mark Seaman) marks@hpgrlt
9  * Language: C
10  * Package: N/A
11  * Status: Reusable Software Component
12  *
13  * (c) Copyright 1987, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  *********************************************************************************/
25 /*----------------------------------------------------------------------
26  I n c l u d e s
27 ----------------------------------------------------------------------*/
28 #include "seam.h"
29 #include "blobs.h"
30 #include "tprintf.h"
31 
32 /*----------------------------------------------------------------------
33  Public Function Code
34 ----------------------------------------------------------------------*/
35 
36 // Returns the bounding box of all the points in the seam.
38  TBOX box(location_.x, location_.y, location_.x, location_.y);
39  for (int s = 0; s < num_splits_; ++s) {
40  box += splits_[s].bounding_box();
41  }
42  return box;
43 }
44 
45 // Returns true if other can be combined into *this.
46 bool SEAM::CombineableWith(const SEAM& other, int max_x_dist,
47  float max_total_priority) const {
48  int dist = location_.x - other.location_.x;
49  if (-max_x_dist < dist && dist < max_x_dist &&
50  num_splits_ + other.num_splits_ <= kMaxNumSplits &&
51  priority_ + other.priority_ < max_total_priority &&
52  !OverlappingSplits(other) && !SharesPosition(other)) {
53  return true;
54  } else {
55  return false;
56  }
57 }
58 
59 // Combines other into *this. Only works if CombinableWith returned true.
60 void SEAM::CombineWith(const SEAM& other) {
61  priority_ += other.priority_;
62  location_ += other.location_;
63  location_ /= 2;
64 
65  for (int s = 0; s < other.num_splits_ && num_splits_ < kMaxNumSplits; ++s)
66  splits_[num_splits_++] = other.splits_[s];
67 }
68 
69 // Returns true if the splits in *this SEAM appear OK in the sense that they
70 // do not cross any outlines and do not chop off any ridiculously small
71 // pieces.
72 bool SEAM::IsHealthy(const TBLOB& blob, int min_points, int min_area) const {
73  // TODO(rays) Try testing all the splits. Duplicating original code for now,
74  // which tested only the first.
75  return num_splits_ == 0 || splits_[0].IsHealthy(blob, min_points, min_area);
76 }
77 
78 // Computes the widthp_/widthn_ range for all existing SEAMs and for *this
79 // seam, which is about to be inserted at insert_index. Returns false if
80 // any of the computations fails, as this indicates an invalid chop.
81 // widthn_/widthp_ are only changed if modify is true.
83  const GenericVector<TBLOB*>& blobs,
84  int insert_index, bool modify) {
85  for (int s = 0; s < insert_index; ++s) {
86  if (!seams[s]->FindBlobWidth(blobs, s, modify)) return false;
87  }
88  if (!FindBlobWidth(blobs, insert_index, modify)) return false;
89  for (int s = insert_index; s < seams.size(); ++s) {
90  if (!seams[s]->FindBlobWidth(blobs, s + 1, modify)) return false;
91  }
92  return true;
93 }
94 
95 // Computes the widthp_/widthn_ range. Returns false if not all the splits
96 // are accounted for. widthn_/widthp_ are only changed if modify is true.
97 bool SEAM::FindBlobWidth(const GenericVector<TBLOB*>& blobs, int index,
98  bool modify) {
99  int num_found = 0;
100  if (modify) {
101  widthp_ = 0;
102  widthn_ = 0;
103  }
104  for (int s = 0; s < num_splits_; ++s) {
105  const SPLIT& split = splits_[s];
106  bool found_split = split.ContainedByBlob(*blobs[index]);
107  // Look right.
108  for (int b = index + 1; !found_split && b < blobs.size(); ++b) {
109  found_split = split.ContainedByBlob(*blobs[b]);
110  if (found_split && b - index > widthp_ && modify) widthp_ = b - index;
111  }
112  // Look left.
113  for (int b = index - 1; !found_split && b >= 0; --b) {
114  found_split = split.ContainedByBlob(*blobs[b]);
115  if (found_split && index - b > widthn_ && modify) widthn_ = index - b;
116  }
117  if (found_split) ++num_found;
118  }
119  return num_found == num_splits_;
120 }
121 
122 // Splits this blob into two blobs by applying the splits included in
123 // *this SEAM
124 void SEAM::ApplySeam(bool italic_blob, TBLOB* blob, TBLOB* other_blob) const {
125  for (int s = 0; s < num_splits_; ++s) {
126  splits_[s].SplitOutlineList(blob->outlines);
127  }
128  blob->ComputeBoundingBoxes();
129 
130  divide_blobs(blob, other_blob, italic_blob, location_);
131 
133  other_blob->EliminateDuplicateOutlines();
134 
135  blob->CorrectBlobOrder(other_blob);
136 }
137 
138 // Undoes ApplySeam by removing the seam between these two blobs.
139 // Produces one blob as a result, and deletes other_blob.
140 void SEAM::UndoSeam(TBLOB* blob, TBLOB* other_blob) const {
141  if (blob->outlines == NULL) {
142  blob->outlines = other_blob->outlines;
143  other_blob->outlines = NULL;
144  }
145 
146  TESSLINE* outline = blob->outlines;
147  while (outline->next) outline = outline->next;
148  outline->next = other_blob->outlines;
149  other_blob->outlines = NULL;
150  delete other_blob;
151 
152  for (int s = 0; s < num_splits_; ++s) {
153  splits_[s].UnsplitOutlineList(blob);
154  }
155  blob->ComputeBoundingBoxes();
157 }
158 
159 // Prints everything in *this SEAM.
160 void SEAM::Print(const char* label) const {
161  tprintf(label);
162  tprintf(" %6.2f @ (%d,%d), p=%d, n=%d ", priority_, location_.x, location_.y,
163  widthp_, widthn_);
164  for (int s = 0; s < num_splits_; ++s) {
165  splits_[s].Print();
166  if (s + 1 < num_splits_) tprintf(", ");
167  }
168  tprintf("\n");
169 }
170 
171 // Prints a collection of SEAMs.
172 /* static */
173 void SEAM::PrintSeams(const char* label, const GenericVector<SEAM*>& seams) {
174  if (!seams.empty()) {
175  tprintf("%s\n", label);
176  for (int x = 0; x < seams.size(); ++x) {
177  tprintf("%2d: ", x);
178  seams[x]->Print("");
179  }
180  tprintf("\n");
181  }
182 }
183 
184 #ifndef GRAPHICS_DISABLED
185 // Draws the seam in the given window.
186 void SEAM::Mark(ScrollView* window) const {
187  for (int s = 0; s < num_splits_; ++s) splits_[s].Mark(window);
188 }
189 #endif
190 
191 // Break up the blobs in this chain so that they are all independent.
192 // This operation should undo the affect of join_pieces.
193 /* static */
195  const GenericVector<TBLOB*>& blobs, int first,
196  int last) {
197  for (int x = first; x < last; ++x) seams[x]->Reveal();
198 
199  TESSLINE* outline = blobs[first]->outlines;
200  int next_blob = first + 1;
201 
202  while (outline != NULL && next_blob <= last) {
203  if (outline->next == blobs[next_blob]->outlines) {
204  outline->next = NULL;
205  outline = blobs[next_blob]->outlines;
206  ++next_blob;
207  } else {
208  outline = outline->next;
209  }
210  }
211 }
212 
213 // Join a group of base level pieces into a single blob that can then
214 // be classified.
215 /* static */
217  const GenericVector<TBLOB*>& blobs, int first, int last) {
218  TESSLINE* outline = blobs[first]->outlines;
219  if (!outline)
220  return;
221 
222  for (int x = first; x < last; ++x) {
223  SEAM *seam = seams[x];
224  if (x - seam->widthn_ >= first && x + seam->widthp_ < last) seam->Hide();
225  while (outline->next) outline = outline->next;
226  outline->next = blobs[x + 1]->outlines;
227  }
228 }
229 
230 // Hides the seam so the outlines appear not to be cut by it.
231 void SEAM::Hide() const {
232  for (int s = 0; s < num_splits_; ++s) {
233  splits_[s].Hide();
234  }
235 }
236 
237 // Undoes hide, so the outlines are cut by the seam.
238 void SEAM::Reveal() const {
239  for (int s = 0; s < num_splits_; ++s) {
240  splits_[s].Reveal();
241  }
242 }
243 
244 // Computes and returns, but does not set, the full priority of *this SEAM.
245 float SEAM::FullPriority(int xmin, int xmax, double overlap_knob,
246  int centered_maxwidth, double center_knob,
247  double width_change_knob) const {
248  if (num_splits_ == 0) return 0.0f;
249  for (int s = 1; s < num_splits_; ++s) {
250  splits_[s].SplitOutline();
251  }
252  float full_priority =
253  priority_ +
254  splits_[0].FullPriority(xmin, xmax, overlap_knob, centered_maxwidth,
255  center_knob, width_change_knob);
256  for (int s = num_splits_ - 1; s >= 1; --s) {
257  splits_[s].UnsplitOutlines();
258  }
259  return full_priority;
260 }
261 
269 void start_seam_list(TWERD* word, GenericVector<SEAM*>* seam_array) {
270  seam_array->truncate(0);
271  TPOINT location;
272 
273  for (int b = 1; b < word->NumBlobs(); ++b) {
274  TBOX bbox = word->blobs[b - 1]->bounding_box();
275  TBOX nbox = word->blobs[b]->bounding_box();
276  location.x = (bbox.right() + nbox.left()) / 2;
277  location.y = (bbox.bottom() + bbox.top() + nbox.bottom() + nbox.top()) / 4;
278  seam_array->push_back(new SEAM(0.0f, location));
279  }
280 }
TBOX bounding_box() const
Definition: seam.cpp:37
void UndoSeam(TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:140
void start_seam_list(TWERD *word, GenericVector< SEAM *> *seam_array)
Definition: seam.cpp:269
void SplitOutline() const
Definition: split.cpp:262
GenericVector< TBLOB * > blobs
Definition: blobs.h:436
static void BreakPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:194
void UnsplitOutlines() const
Definition: split.cpp:303
bool OverlappingSplits(const SEAM &other) const
Definition: seam.h:103
bool SharesPosition(const SEAM &other) const
Definition: seam.h:95
Definition: blobs.h:50
static void PrintSeams(const char *label, const GenericVector< SEAM *> &seams)
Definition: seam.cpp:173
static void JoinPieces(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int first, int last)
Definition: seam.cpp:216
void EliminateDuplicateOutlines()
Definition: blobs.cpp:495
LIST last(LIST var_list)
Definition: oldlist.cpp:271
void divide_blobs(TBLOB *blob, TBLOB *other_blob, bool italic_blob, const TPOINT &location)
Definition: blobs.cpp:983
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: seam.cpp:245
inT16 y
Definition: blobs.h:72
void SplitOutlineList(TESSLINE *outlines) const
Definition: split.cpp:243
TBOX bounding_box() const
Definition: split.cpp:52
int push_back(T object)
inT16 bottom() const
Definition: rect.h:61
TESSLINE * next
Definition: blobs.h:258
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: split.cpp:89
void UnsplitOutlineList(TBLOB *blob) const
Definition: split.cpp:287
inT16 left() const
Definition: rect.h:68
void CombineWith(const SEAM &other)
Definition: seam.cpp:60
void Hide() const
Definition: seam.cpp:231
Definition: blobs.h:395
void truncate(int size)
void ComputeBoundingBoxes()
Definition: blobs.cpp:462
void Hide() const
Definition: split.cpp:59
inT16 x
Definition: blobs.h:71
bool PrepareToInsertSeam(const GenericVector< SEAM *> &seams, const GenericVector< TBLOB *> &blobs, int insert_index, bool modify)
Definition: seam.cpp:82
int NumBlobs() const
Definition: blobs.h:425
#define tprintf(...)
Definition: tprintf.h:31
Definition: split.h:37
void Reveal() const
Definition: split.cpp:73
int size() const
Definition: genericvector.h:72
TESSLINE * outlines
Definition: blobs.h:377
void Reveal() const
Definition: seam.cpp:238
void CorrectBlobOrder(TBLOB *next)
Definition: blobs.cpp:515
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: split.cpp:123
void Mark(ScrollView *window) const
Definition: seam.cpp:186
Definition: blobs.h:261
void ApplySeam(bool italic_blob, TBLOB *blob, TBLOB *other_blob) const
Definition: seam.cpp:124
inT16 top() const
Definition: rect.h:54
bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const
Definition: seam.cpp:46
Definition: seam.h:44
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
void Print() const
Definition: split.cpp:227
bool empty() const
Definition: genericvector.h:84
void Print(const char *label) const
Definition: seam.cpp:160
bool ContainedByBlob(const TBLOB &blob) const
Definition: split.h:65
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: seam.cpp:72
bool FindBlobWidth(const GenericVector< TBLOB *> &blobs, int index, bool modify)
Definition: seam.cpp:97