tesseract  3.05.02
findseam.cpp
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: findseam.c (Formerly findseam.c)
5  * Description:
6  * Author: Mark Seaman, OCR Technology
7  * Created: Fri Oct 16 14:37:00 1987
8  * Modified: Tue Jul 30 15:44:59 1991 (Mark Seaman) marks@hpgrlt
9  * Language: C
10  * Package: N/A
11  * Status: Reusable Software Component
12  *
13  * (c) Copyright 1987, Hewlett-Packard Company.
14  ** Licensed under the Apache License, Version 2.0 (the "License");
15  ** you may not use this file except in compliance with the License.
16  ** You may obtain a copy of the License at
17  ** http://www.apache.org/licenses/LICENSE-2.0
18  ** Unless required by applicable law or agreed to in writing, software
19  ** distributed under the License is distributed on an "AS IS" BASIS,
20  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21  ** See the License for the specific language governing permissions and
22  ** limitations under the License.
23  *
24  *********************************************************************************/
25 /*----------------------------------------------------------------------
26  I n c l u d e s
27 ----------------------------------------------------------------------*/
28 #include "findseam.h"
29 #include "gradechop.h"
30 #include "plotedges.h"
31 #include "outlines.h"
32 #include "seam.h"
33 #include "wordrec.h"
34 
35 // Include automatically generated configuration file if running autoconf.
36 #ifdef HAVE_CONFIG_H
37 #include "config_auto.h"
38 #endif
39 
40 /*----------------------------------------------------------------------
41  T y p e s
42 ----------------------------------------------------------------------*/
43 #define SPLIT_CLOSENESS 20/* Difference in x value */
44  /* How many to keep */
45 #define MAX_NUM_SEAMS 150
46  /* How many to keep */
47 #define MAX_OLD_SEAMS 150
48 #define NO_FULL_PRIORITY -1/* Special marker for pri. */
49  /* Evalute right away */
50 #define BAD_PRIORITY 9999.0
51 
52 /*----------------------------------------------------------------------
53  F u n c t i o n s
54 ----------------------------------------------------------------------*/
55 namespace tesseract {
56 
57 /**********************************************************************
58  * add_seam_to_queue
59  *
60  * Adds the given new_seam to the seams priority queue, unless it is full
61  * and the new seam is worse than the worst.
62  **********************************************************************/
63 void Wordrec::add_seam_to_queue(float new_priority, SEAM *new_seam,
64  SeamQueue* seams) {
65  if (new_seam == NULL) return;
66  if (chop_debug) {
67  tprintf("Pushing new seam with priority %g :", new_priority);
68  new_seam->Print("seam: ");
69  }
70  if (seams->size() >= MAX_NUM_SEAMS) {
71  SeamPair old_pair(0, NULL);
72  if (seams->PopWorst(&old_pair) && old_pair.key() <= new_priority) {
73  if (chop_debug) {
74  tprintf("Old seam staying with priority %g\n", old_pair.key());
75  }
76  delete new_seam;
77  seams->Push(&old_pair);
78  return;
79  } else if (chop_debug) {
80  tprintf("New seam with priority %g beats old worst seam with %g\n",
81  new_priority, old_pair.key());
82  }
83  }
84  SeamPair new_pair(new_priority, new_seam);
85  seams->Push(&new_pair);
86 }
87 
88 
89 /**********************************************************************
90  * choose_best_seam
91  *
92  * Choose the best seam that can be created by assembling this a
93  * collection of splits. A queue of all the possible seams is
94  * maintained. Each new split received is placed in that queue with
95  * its partial priority value. These values in the seam queue are
96  * evaluated and combined until a good enough seam is found. If no
97  * further good seams are being found then this function returns to the
98  * caller, who will send more splits. If this function is called with
99  * a split of NULL, then no further splits can be supplied by the
100  * caller.
101  **********************************************************************/
102 void Wordrec::choose_best_seam(SeamQueue *seam_queue, const SPLIT *split,
103  PRIORITY priority, SEAM **seam_result,
104  TBLOB *blob, SeamPile *seam_pile) {
105  SEAM *seam;
106  char str[80];
107  float my_priority;
108  /* Add seam of split */
109  my_priority = priority;
110  if (split != NULL) {
111  TPOINT split_point = split->point1->pos;
112  split_point += split->point2->pos;
113  split_point /= 2;
114  seam = new SEAM(my_priority, split_point, *split);
115  if (chop_debug > 1) seam->Print("Partial priority ");
116  add_seam_to_queue(my_priority, seam, seam_queue);
117 
118  if (my_priority > chop_good_split)
119  return;
120  }
121 
122  TBOX bbox = blob->bounding_box();
123  /* Queue loop */
124  while (!seam_queue->empty()) {
125  SeamPair seam_pair;
126  seam_queue->Pop(&seam_pair);
127  seam = seam_pair.extract_data();
128  /* Set full priority */
129  my_priority = seam->FullPriority(bbox.left(), bbox.right(),
132  if (chop_debug) {
133  sprintf (str, "Full my_priority %0.0f, ", my_priority);
134  seam->Print(str);
135  }
136 
137  if ((*seam_result == NULL || (*seam_result)->priority() > my_priority) &&
138  my_priority < chop_ok_split) {
139  /* No crossing */
140  if (seam->IsHealthy(*blob, chop_min_outline_points,
142  delete *seam_result;
143  *seam_result = new SEAM(*seam);
144  (*seam_result)->set_priority(my_priority);
145  } else {
146  delete seam;
147  seam = NULL;
148  my_priority = BAD_PRIORITY;
149  }
150  }
151 
152  if (my_priority < chop_good_split) {
153  if (seam)
154  delete seam;
155  return; /* Made good answer */
156  }
157 
158  if (seam) {
159  /* Combine with others */
160  if (seam_pile->size() < chop_seam_pile_size) {
161  combine_seam(*seam_pile, seam, seam_queue);
162  SeamDecPair pair(seam_pair.key(), seam);
163  seam_pile->Push(&pair);
164  } else if (chop_new_seam_pile &&
165  seam_pile->size() == chop_seam_pile_size &&
166  seam_pile->PeekTop().key() > seam_pair.key()) {
167  combine_seam(*seam_pile, seam, seam_queue);
168  SeamDecPair pair;
169  seam_pile->Pop(&pair); // pop the worst.
170  // Replace the seam in pair (deleting the old one) with
171  // the new seam and score, then push back into the heap.
172  pair.set_key(seam_pair.key());
173  pair.set_data(seam);
174  seam_pile->Push(&pair);
175  } else {
176  delete seam;
177  }
178  }
179 
180  my_priority = seam_queue->empty() ? NO_FULL_PRIORITY
181  : seam_queue->PeekTop().key();
182  if ((my_priority > chop_ok_split) ||
183  (my_priority > chop_good_split && split))
184  return;
185  }
186 }
187 
188 
189 /**********************************************************************
190  * combine_seam
191  *
192  * Find other seams to combine with this one. The new seams that result
193  * from this union should be added to the seam queue. The return value
194  * tells whether or not any additional seams were added to the queue.
195  **********************************************************************/
196 void Wordrec::combine_seam(const SeamPile& seam_pile,
197  const SEAM* seam, SeamQueue* seam_queue) {
198  for (int x = 0; x < seam_pile.size(); ++x) {
199  const SEAM *this_one = seam_pile.get(x).data();
200  if (seam->CombineableWith(*this_one, SPLIT_CLOSENESS, chop_ok_split)) {
201  SEAM *new_one = new SEAM(*seam);
202  new_one->CombineWith(*this_one);
203  if (chop_debug > 1) new_one->Print("Combo priority ");
204  add_seam_to_queue(new_one->priority(), new_one, seam_queue);
205  }
206  }
207 }
208 
209 /**********************************************************************
210  * pick_good_seam
211  *
212  * Find and return a good seam that will split this blob into two pieces.
213  * Work from the outlines provided.
214  **********************************************************************/
216  SeamPile seam_pile(chop_seam_pile_size);
217  EDGEPT *points[MAX_NUM_POINTS];
218  EDGEPT_CLIST new_points;
219  SEAM *seam = NULL;
220  TESSLINE *outline;
221  inT16 num_points = 0;
222 
223 #ifndef GRAPHICS_DISABLED
224  if (chop_debug > 2)
225  wordrec_display_splits.set_value(true);
226 
227  draw_blob_edges(blob);
228 #endif
229 
230  PointHeap point_heap(MAX_NUM_POINTS);
231  for (outline = blob->outlines; outline; outline = outline->next)
232  prioritize_points(outline, &point_heap);
233 
234  while (!point_heap.empty() && num_points < MAX_NUM_POINTS) {
235  points[num_points++] = point_heap.PeekTop().data;
236  point_heap.Pop(NULL);
237  }
238 
239  /* Initialize queue */
240  SeamQueue seam_queue(MAX_NUM_SEAMS);
241 
242  try_point_pairs(points, num_points, &seam_queue, &seam_pile, &seam, blob);
243  try_vertical_splits(points, num_points, &new_points,
244  &seam_queue, &seam_pile, &seam, blob);
245 
246  if (seam == NULL) {
247  choose_best_seam(&seam_queue, NULL, BAD_PRIORITY, &seam, blob, &seam_pile);
248  } else if (seam->priority() > chop_good_split) {
249  choose_best_seam(&seam_queue, NULL, seam->priority(), &seam, blob,
250  &seam_pile);
251  }
252 
253  EDGEPT_C_IT it(&new_points);
254  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
255  EDGEPT *inserted_point = it.data();
256  if (seam == NULL || !seam->UsesPoint(inserted_point)) {
257  for (outline = blob->outlines; outline; outline = outline->next) {
258  if (outline->loop == inserted_point) {
259  outline->loop = outline->loop->next;
260  }
261  }
262  remove_edgept(inserted_point);
263  }
264  }
265 
266  if (seam) {
267  if (seam->priority() > chop_ok_split) {
268  delete seam;
269  seam = NULL;
270  }
271 #ifndef GRAPHICS_DISABLED
272  else if (wordrec_display_splits) {
273  seam->Mark(edge_window);
274  if (chop_debug > 2) {
277  }
278  }
279 #endif
280  }
281 
282  if (chop_debug)
283  wordrec_display_splits.set_value(false);
284 
285  return (seam);
286 }
287 
288 
289 /**********************************************************************
290  * try_point_pairs
291  *
292  * Try all the splits that are produced by pairing critical points
293  * together. See if any of them are suitable for use. Use a seam
294  * queue and seam pile that have already been initialized and used.
295  **********************************************************************/
297  inT16 num_points,
298  SeamQueue* seam_queue,
299  SeamPile* seam_pile,
300  SEAM ** seam,
301  TBLOB * blob) {
302  inT16 x;
303  inT16 y;
304  PRIORITY priority;
305 
306  for (x = 0; x < num_points; x++) {
307  for (y = x + 1; y < num_points; y++) {
308  if (points[y] &&
309  points[x]->WeightedDistance(*points[y], chop_x_y_weight) <
311  points[x] != points[y]->next && points[y] != points[x]->next &&
312  !is_exterior_point(points[x], points[y]) &&
313  !is_exterior_point(points[y], points[x])) {
314  SPLIT split(points[x], points[y]);
315  priority = partial_split_priority(&split);
316 
317  choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
318  }
319  }
320  }
321 }
322 
323 
324 /**********************************************************************
325  * try_vertical_splits
326  *
327  * Try all the splits that are produced by vertical projection to see
328  * if any of them are suitable for use. Use a seam queue and seam pile
329  * that have already been initialized and used.
330  * Return in new_points a collection of points that were inserted into
331  * the blob while examining vertical splits and which may safely be
332  * removed once a seam is chosen if they are not part of the seam.
333  **********************************************************************/
335  inT16 num_points,
336  EDGEPT_CLIST *new_points,
337  SeamQueue* seam_queue,
338  SeamPile* seam_pile,
339  SEAM ** seam,
340  TBLOB * blob) {
341  EDGEPT *vertical_point = NULL;
342  inT16 x;
343  PRIORITY priority;
344  TESSLINE *outline;
345 
346  for (x = 0; x < num_points; x++) {
347  vertical_point = NULL;
348  for (outline = blob->outlines; outline; outline = outline->next) {
349  vertical_projection_point(points[x], outline->loop,
350  &vertical_point, new_points);
351  }
352 
353  if (vertical_point && points[x] != vertical_point->next &&
354  vertical_point != points[x]->next &&
355  points[x]->WeightedDistance(*vertical_point, chop_x_y_weight) <
357  SPLIT split(points[x], vertical_point);
358  priority = partial_split_priority(&split);
359  choose_best_seam(seam_queue, &split, priority, seam, blob, seam_pile);
360  }
361  }
362 }
363 
364 }
void remove_edgept(EDGEPT *point)
Definition: split.cpp:208
const Pair & get(int index) const
Definition: genericheap.h:87
bool chop_new_seam_pile
Definition: wordrec.h:146
int chop_centered_maxwidth
Definition: wordrec.h:153
EDGEPT * loop
Definition: blobs.h:257
void try_vertical_splits(EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, EDGEPT_CLIST *new_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:334
Definition: blobs.h:76
short inT16
Definition: host.h:33
float priority() const
Definition: seam.h:65
Data * extract_data()
Definition: kdpair.h:131
double chop_overlap_knob
Definition: wordrec.h:150
bool empty() const
Definition: genericheap.h:68
const Key & key() const
Definition: kdpair.h:116
#define update_edge_window()
Definition: plotedges.h:45
void set_data(Data *new_data)
Definition: kdpair.h:126
EDGEPT * point2
Definition: split.h:104
Definition: blobs.h:50
void combine_seam(const SeamPile &seam_pile, const SEAM *seam, SeamQueue *seam_queue)
Definition: findseam.cpp:196
void draw_blob_edges(TBLOB *blob)
Definition: plotedges.cpp:77
EDGEPT * point1
Definition: split.h:103
float PRIORITY
Definition: seam.h:42
float FullPriority(int xmin, int xmax, double overlap_knob, int centered_maxwidth, double center_knob, double width_change_knob) const
Definition: seam.cpp:245
#define is_exterior_point(edge, point)
Definition: outlines.h:97
int chop_min_outline_points
Definition: wordrec.h:144
void Push(Pair *entry)
Definition: genericheap.h:95
ScrollView * edge_window
Definition: plotedges.cpp:43
void choose_best_seam(SeamQueue *seam_queue, const SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob, SeamPile *seam_pile)
Definition: findseam.cpp:102
TBOX bounding_box() const
Definition: blobs.cpp:482
double chop_ok_split
Definition: wordrec.h:156
EDGEPT * next
Definition: blobs.h:169
#define partial_split_priority(split)
Definition: gradechop.h:46
double chop_width_change_knob
Definition: wordrec.h:155
int chop_seam_pile_size
Definition: wordrec.h:145
void prioritize_points(TESSLINE *outline, PointHeap *points)
Definition: chop.cpp:162
TPOINT pos
Definition: blobs.h:163
TESSLINE * next
Definition: blobs.h:258
int chop_min_outline_area
Definition: wordrec.h:148
double chop_good_split
Definition: wordrec.h:157
void try_point_pairs(EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, SeamQueue *seam_queue, SeamPile *seam_pile, SEAM **seam, TBLOB *blob)
Definition: findseam.cpp:296
#define BAD_PRIORITY
Definition: findseam.cpp:50
inT16 left() const
Definition: rect.h:68
const Pair & PeekTop() const
Definition: genericheap.h:108
bool wordrec_display_splits
Definition: split.cpp:49
void CombineWith(const SEAM &other)
Definition: seam.cpp:60
#define SPLIT_CLOSENESS
Definition: findseam.cpp:43
#define tprintf(...)
Definition: tprintf.h:31
#define NO_FULL_PRIORITY
Definition: findseam.cpp:48
Definition: split.h:37
void add_seam_to_queue(float new_priority, SEAM *new_seam, SeamQueue *seams)
Definition: findseam.cpp:63
TESSLINE * outlines
Definition: blobs.h:377
bool Pop(Pair *entry)
Definition: genericheap.h:118
void Mark(ScrollView *window) const
Definition: seam.cpp:186
Definition: blobs.h:261
bool UsesPoint(const EDGEPT *point) const
Definition: seam.h:88
void vertical_projection_point(EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points)
Definition: chop.cpp:274
bool PopWorst(Pair *entry)
Definition: genericheap.h:140
bool CombineableWith(const SEAM &other, int max_x_dist, float max_total_priority) const
Definition: seam.cpp:46
Definition: seam.h:44
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
void set_key(const Key &new_key)
Definition: kdpair.h:119
#define MAX_NUM_SEAMS
Definition: findseam.cpp:45
#define edge_window_wait()
Definition: plotedges.h:57
SEAM * pick_good_seam(TBLOB *blob)
Definition: findseam.cpp:215
void Print(const char *label) const
Definition: seam.cpp:160
int WeightedDistance(const EDGEPT &other, int x_factor) const
Definition: blobs.h:99
double chop_center_knob
Definition: wordrec.h:151
#define MAX_NUM_POINTS
Definition: chop.h:39
bool IsHealthy(const TBLOB &blob, int min_points, int min_area) const
Definition: seam.cpp:72