tesseract  3.05.02
pitsync1.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: pitsync1.cpp (Formerly pitsync.c)
3  * Description: Code to find the optimum fixed pitch segmentation of some blobs.
4  * Author: Ray Smith
5  * Created: Thu Nov 19 11:48:05 GMT 1992
6  *
7  * (C) Copyright 1992, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifdef __UNIX__
21 #include <assert.h>
22 #endif
23 #include <math.h>
24 #include "memry.h"
25 #include "pitsync1.h"
26 
27 ELISTIZE (FPSEGPT) CLISTIZE (FPSEGPT_LIST)
28 #define EXTERN
29 EXTERN
30 INT_VAR (pitsync_linear_version, 6, "Use new fast algorithm");
31 EXTERN
33 "Dist inside big blob for chopping");
34 EXTERN
36 "Fraction of cut for free cuts");
37 EXTERN
38 INT_VAR (pitsync_fake_depth, 1, "Max advance fake generation");
39 
40 /**********************************************************************
41  * FPSEGPT::FPSEGPT
42  *
43  * Constructor to make a new FPSEGPT.
44  * The existing FPCUTPT is duplicated.
45  **********************************************************************/
46 
47 FPSEGPT::FPSEGPT( //constructor
48  FPCUTPT *cutpt //create from new form
49  ) {
50  pred = NULL;
51  mean_sum = cutpt->sum ();
52  sq_sum = cutpt->squares ();
53  cost = cutpt->cost_function ();
54  faked = cutpt->faked;
55  terminal = cutpt->terminal;
56  fake_count = cutpt->fake_count;
57  xpos = cutpt->position ();
58  mid_cuts = cutpt->cheap_cuts ();
59 }
60 
61 
62 /**********************************************************************
63  * FPSEGPT::FPSEGPT
64  *
65  * Constructor to make a new FPSEGPT.
66  **********************************************************************/
67 
68 FPSEGPT::FPSEGPT ( //constructor
69 inT16 x //position
70 ):xpos (x) {
71  pred = NULL;
72  mean_sum = 0;
73  sq_sum = 0;
74  cost = 0;
75  faked = FALSE;
76  terminal = FALSE;
77  fake_count = 0;
78  mid_cuts = 0;
79 }
80 
81 
82 /**********************************************************************
83  * FPSEGPT::FPSEGPT
84  *
85  * Constructor to make a new FPSEGPT.
86  **********************************************************************/
87 
88 FPSEGPT::FPSEGPT ( //constructor
89 inT16 x, //position
90 BOOL8 faking, //faking this one
91 inT16 offset, //dist to gap
92 inT16 region_index, //segment number
93 inT16 pitch, //proposed pitch
94 inT16 pitch_error, //allowed tolerance
95 FPSEGPT_LIST * prev_list //previous segment
96 ):xpos (x) {
97  inT16 best_fake; //on previous
98  FPSEGPT *segpt; //segment point
99  inT32 dist; //from prev segment
100  double sq_dist; //squared distance
101  double mean; //mean pitch
102  double total; //total dists
103  double factor; //cost function
104  FPSEGPT_IT pred_it = prev_list;//for previuos segment
105 
106  cost = MAX_FLOAT32;
107  pred = NULL;
108  faked = faking;
109  terminal = FALSE;
110  best_fake = MAX_INT16;
111  mid_cuts = 0;
112  for (pred_it.mark_cycle_pt (); !pred_it.cycled_list (); pred_it.forward ()) {
113  segpt = pred_it.data ();
114  if (segpt->fake_count < best_fake)
115  best_fake = segpt->fake_count;
116  dist = x - segpt->xpos;
117  if (dist >= pitch - pitch_error && dist <= pitch + pitch_error
118  && !segpt->terminal) {
119  total = segpt->mean_sum + dist;
120  sq_dist = dist * dist + segpt->sq_sum + offset * offset;
121  //sum of squarees
122  mean = total / region_index;
123  factor = mean - pitch;
124  factor *= factor;
125  factor += sq_dist / (region_index) - mean * mean;
126  if (factor < cost) {
127  cost = factor; //find least cost
128  pred = segpt; //save path
129  mean_sum = total;
130  sq_sum = sq_dist;
131  fake_count = segpt->fake_count + faked;
132  }
133  }
134  }
135  if (fake_count > best_fake + 1)
136  pred = NULL; //fail it
137 }
138 
139 
140 /**********************************************************************
141  * check_pitch_sync
142  *
143  * Construct the lattice of possible segmentation points and choose the
144  * optimal path. Return the optimal path only.
145  * The return value is a measure of goodness of the sync.
146  **********************************************************************/
147 
148 double check_pitch_sync( //find segmentation
149  BLOBNBOX_IT *blob_it, //blobs to do
150  inT16 blob_count, //no of blobs
151  inT16 pitch, //pitch estimate
152  inT16 pitch_error, //tolerance
153  STATS *projection, //vertical
154  FPSEGPT_LIST *seg_list //output list
155  ) {
156  inT16 x; //current coord
157  inT16 min_index; //blob number
158  inT16 max_index; //blob number
159  inT16 left_edge; //of word
160  inT16 right_edge; //of word
161  inT16 right_max; //max allowed x
162  inT16 min_x; //in this region
163  inT16 max_x;
164  inT16 region_index;
165  inT16 best_region_index = 0; //for best result
166  inT16 offset; //dist to legal area
167  inT16 left_best_x; //edge of good region
168  inT16 right_best_x; //right edge
169  TBOX min_box; //bounding box
170  TBOX max_box; //bounding box
171  TBOX next_box; //box of next blob
172  FPSEGPT *segpt; //segment point
173  FPSEGPT_LIST *segpts; //points in a segment
174  double best_cost; //best path
175  double mean_sum; //computes result
176  FPSEGPT *best_end; //end of best path
177  BLOBNBOX_IT min_it; //copy iterator
178  BLOBNBOX_IT max_it; //copy iterator
179  FPSEGPT_IT segpt_it; //iterator
180  //output segments
181  FPSEGPT_IT outseg_it = seg_list;
182  FPSEGPT_LIST_CLIST lattice; //list of lists
183  //region iterator
184  FPSEGPT_LIST_C_IT lattice_it = &lattice;
185 
186  // tprintf("Computing sync on word of %d blobs with pitch %d\n",
187  // blob_count, pitch);
188  // if (blob_count==8 && pitch==27)
189  // projection->print(stdout,TRUE);
190  if (pitch < 3)
191  pitch = 3; //nothing ludicrous
192  if ((pitch - 3) / 2 < pitch_error)
193  pitch_error = (pitch - 3) / 2;
194  min_it = *blob_it;
195  min_box = box_next (&min_it); //get box
196  // if (blob_count==8 && pitch==27)
197  // tprintf("1st box at (%d,%d)->(%d,%d)\n",
198  // min_box.left(),min_box.bottom(),
199  // min_box.right(),min_box.top());
200  //left of word
201  left_edge = min_box.left () + pitch_error;
202  for (min_index = 1; min_index < blob_count; min_index++) {
203  min_box = box_next (&min_it);
204  // if (blob_count==8 && pitch==27)
205  // tprintf("Box at (%d,%d)->(%d,%d)\n",
206  // min_box.left(),min_box.bottom(),
207  // min_box.right(),min_box.top());
208  }
209  right_edge = min_box.right (); //end of word
210  max_x = left_edge;
211  //min permissible
212  min_x = max_x - pitch + pitch_error * 2 + 1;
213  right_max = right_edge + pitch - pitch_error - 1;
214  segpts = new FPSEGPT_LIST; //list of points
215  segpt_it.set_to_list (segpts);
216  for (x = min_x; x <= max_x; x++) {
217  segpt = new FPSEGPT (x); //make a new one
218  //put in list
219  segpt_it.add_after_then_move (segpt);
220  }
221  //first segment
222  lattice_it.add_before_then_move (segpts);
223  min_index = 0;
224  region_index = 1;
225  best_cost = MAX_FLOAT32;
226  best_end = NULL;
227  min_it = *blob_it;
228  min_box = box_next (&min_it); //first box
229  do {
230  left_best_x = -1;
231  right_best_x = -1;
232  segpts = new FPSEGPT_LIST; //list of points
233  segpt_it.set_to_list (segpts);
234  min_x += pitch - pitch_error;//next limits
235  max_x += pitch + pitch_error;
236  while (min_box.right () < min_x && min_index < blob_count) {
237  min_index++;
238  min_box = box_next (&min_it);
239  }
240  max_it = min_it;
241  max_index = min_index;
242  max_box = min_box;
243  next_box = box_next (&max_it);
244  for (x = min_x; x <= max_x && x <= right_max; x++) {
245  while (x < right_edge && max_index < blob_count
246  && x > max_box.right ()) {
247  max_index++;
248  max_box = next_box;
249  next_box = box_next (&max_it);
250  }
251  if (x <= max_box.left () + pitch_error
252  || x >= max_box.right () - pitch_error || x >= right_edge
253  || (max_index < blob_count - 1 && x >= next_box.left ())
254  || (x - max_box.left () > pitch * pitsync_joined_edge
255  && max_box.right () - x > pitch * pitsync_joined_edge)) {
256  // || projection->local_min(x))
257  if (x - max_box.left () > 0
258  && x - max_box.left () <= pitch_error)
259  //dist to real break
260  offset = x - max_box.left ();
261  else if (max_box.right () - x > 0
262  && max_box.right () - x <= pitch_error
263  && (max_index >= blob_count - 1
264  || x < next_box.left ()))
265  offset = max_box.right () - x;
266  else
267  offset = 0;
268  // offset=pitsync_offset_freecut_fraction*projection->pile_count(x);
269  segpt = new FPSEGPT (x, FALSE, offset, region_index,
270  pitch, pitch_error, lattice_it.data ());
271  }
272  else {
273  offset = projection->pile_count (x);
274  segpt = new FPSEGPT (x, TRUE, offset, region_index,
275  pitch, pitch_error, lattice_it.data ());
276  }
277  if (segpt->previous () != NULL) {
278  segpt_it.add_after_then_move (segpt);
279  if (x >= right_edge - pitch_error) {
280  segpt->terminal = TRUE;//no more wanted
281  if (segpt->cost_function () < best_cost) {
282  best_cost = segpt->cost_function ();
283  //find least
284  best_end = segpt;
285  best_region_index = region_index;
286  left_best_x = x;
287  right_best_x = x;
288  }
289  else if (segpt->cost_function () == best_cost
290  && right_best_x == x - 1)
291  right_best_x = x;
292  }
293  }
294  else {
295  delete segpt; //no good
296  }
297  }
298  if (segpts->empty ()) {
299  if (best_end != NULL)
300  break; //already found one
301  make_illegal_segment (lattice_it.data (), min_box, min_it,
302  region_index, pitch, pitch_error, segpts);
303  }
304  else {
305  if (right_best_x > left_best_x + 1) {
306  left_best_x = (left_best_x + right_best_x + 1) / 2;
307  for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
308  && segpt_it.data ()->position () != left_best_x;
309  segpt_it.forward ());
310  if (segpt_it.data ()->position () == left_best_x)
311  //middle of region
312  best_end = segpt_it.data ();
313  }
314  }
315  //new segment
316  lattice_it.add_before_then_move (segpts);
317  region_index++;
318  }
319  while (min_x < right_edge);
320  ASSERT_HOST (best_end != NULL);//must always find some
321 
322  for (lattice_it.mark_cycle_pt (); !lattice_it.cycled_list ();
323  lattice_it.forward ()) {
324  segpts = lattice_it.data ();
325  segpt_it.set_to_list (segpts);
326  // if (blob_count==8 && pitch==27)
327  // {
328  // for (segpt_it.mark_cycle_pt();!segpt_it.cycled_list();segpt_it.forward())
329  // {
330  // segpt=segpt_it.data();
331  // tprintf("At %d, (%x) cost=%g, m=%g, sq=%g, pred=%x\n",
332  // segpt->position(),segpt,segpt->cost_function(),
333  // segpt->sum(),segpt->squares(),segpt->previous());
334  // }
335  // tprintf("\n");
336  // }
337  for (segpt_it.mark_cycle_pt (); !segpt_it.cycled_list ()
338  && segpt_it.data () != best_end; segpt_it.forward ());
339  if (segpt_it.data () == best_end) {
340  //save good one
341  segpt = segpt_it.extract ();
342  outseg_it.add_before_then_move (segpt);
343  best_end = segpt->previous ();
344  }
345  }
346  ASSERT_HOST (best_end == NULL);
347  ASSERT_HOST (!outseg_it.empty ());
348  outseg_it.move_to_last ();
349  mean_sum = outseg_it.data ()->sum ();
350  mean_sum = mean_sum * mean_sum / best_region_index;
351  if (outseg_it.data ()->squares () - mean_sum < 0)
352  tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
353  outseg_it.data ()->squares (), outseg_it.data ()->sum (),
354  best_region_index);
355  lattice.deep_clear (); //shift the lot
356  return outseg_it.data ()->squares () - mean_sum;
357 }
358 
359 
360 /**********************************************************************
361  * make_illegal_segment
362  *
363  * Make a fake set of chop points due to having no legal places.
364  **********************************************************************/
365 
366 void make_illegal_segment( //find segmentation
367  FPSEGPT_LIST *prev_list, //previous segments
368  TBOX blob_box, //bounding box
369  BLOBNBOX_IT blob_it, //iterator
370  inT16 region_index, //number of segment
371  inT16 pitch, //pitch estimate
372  inT16 pitch_error, //tolerance
373  FPSEGPT_LIST *seg_list //output list
374  ) {
375  inT16 x; //current coord
376  inT16 min_x = 0; //in this region
377  inT16 max_x = 0;
378  inT16 offset; //dist to edge
379  FPSEGPT *segpt; //segment point
380  FPSEGPT *prevpt; //previous point
381  float best_cost; //best path
382  FPSEGPT_IT segpt_it = seg_list;//iterator
383  //previous points
384  FPSEGPT_IT prevpt_it = prev_list;
385 
386  best_cost = MAX_FLOAT32;
387  for (prevpt_it.mark_cycle_pt (); !prevpt_it.cycled_list ();
388  prevpt_it.forward ()) {
389  prevpt = prevpt_it.data ();
390  if (prevpt->cost_function () < best_cost) {
391  //find least
392  best_cost = prevpt->cost_function ();
393  min_x = prevpt->position ();
394  max_x = min_x; //limits on coords
395  }
396  else if (prevpt->cost_function () == best_cost) {
397  max_x = prevpt->position ();
398  }
399  }
400  min_x += pitch - pitch_error;
401  max_x += pitch + pitch_error;
402  for (x = min_x; x <= max_x; x++) {
403  while (x > blob_box.right ()) {
404  blob_box = box_next (&blob_it);
405  }
406  offset = x - blob_box.left ();
407  if (blob_box.right () - x < offset)
408  offset = blob_box.right () - x;
409  segpt = new FPSEGPT (x, FALSE, offset,
410  region_index, pitch, pitch_error, prev_list);
411  if (segpt->previous () != NULL) {
412  ASSERT_HOST (offset >= 0);
413  fprintf (stderr, "made fake at %d\n", x);
414  //make one up
415  segpt_it.add_after_then_move (segpt);
416  segpt->faked = TRUE;
417  segpt->fake_count++;
418  }
419  else
420  delete segpt;
421  }
422 }
EXTERN int pitsync_fake_depth
Definition: pitsync1.cpp:38
inT16 cheap_cuts() const
Definition: pithsync.h:84
#define TRUE
Definition: capi.h:45
#define EXTERN
short inT16
Definition: host.h:33
double check_pitch_sync(BLOBNBOX_IT *blob_it, inT16 blob_count, inT16 pitch, inT16 pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
Definition: pitsync1.cpp:148
double cost_function()
Definition: pitsync1.h:52
void make_illegal_segment(FPSEGPT_LIST *prev_list, TBOX blob_box, BLOBNBOX_IT blob_it, inT16 region_index, inT16 pitch, inT16 pitch_error, FPSEGPT_LIST *seg_list)
Definition: pitsync1.cpp:366
unsigned char BOOL8
Definition: host.h:46
EXTERN double pitsync_joined_edge
Definition: pitsync1.cpp:33
EXTERN double pitsync_offset_freecut_fraction
Definition: pitsync1.cpp:36
#define MAX_FLOAT32
Definition: host.h:57
double squares()
Definition: pithsync.h:75
ELISTIZE(FPSEGPT) CLISTIZE(FPSEGPT_LIST) EXTERN int pitsync_linear_version
#define MAX_INT16
Definition: host.h:52
inT32 position()
Definition: pitsync1.h:49
BOOL8 terminal
Definition: pitsync1.h:70
#define FALSE
Definition: capi.h:46
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:631
inT16 left() const
Definition: rect.h:68
BOOL8 faked
Definition: pitsync1.h:69
int inT32
Definition: host.h:35
#define tprintf(...)
Definition: tprintf.h:31
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
inT32 position()
Definition: pithsync.h:69
double cost_function()
Definition: pithsync.h:72
double sum()
Definition: pithsync.h:78
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
BOOL8 terminal
Definition: pithsync.h:92
#define double_VAR(name, val, comment)
Definition: params.h:286
inT16 fake_count
Definition: pithsync.h:93
inT16 fake_count
Definition: pitsync1.h:71
#define CLISTIZE(CLASSNAME)
Definition: clst.h:913
FPSEGPT * previous()
Definition: pitsync1.h:61
Definition: statistc.h:33
#define ASSERT_HOST(x)
Definition: errcode.h:84
BOOL8 faked
Definition: pithsync.h:91
#define INT_VAR(name, val, comment)
Definition: params.h:277
FPSEGPT()
Definition: pitsync1.h:35