tesseract  3.05.02
pithsync.h File Reference
#include "blobbox.h"
#include "params.h"
#include "statistc.h"

Go to the source code of this file.

Classes

class  FPCUTPT
 

Functions

double check_pitch_sync2 (BLOBNBOX_IT *blob_it, inT16 blob_count, inT16 pitch, inT16 pitch_error, STATS *projection, inT16 projection_left, inT16 projection_right, float projection_scale, inT16 &occupation_count, FPSEGPT_LIST *seg_list, inT16 start, inT16 end)
 
double check_pitch_sync3 (inT16 projection_left, inT16 projection_right, inT16 zero_count, inT16 pitch, inT16 pitch_error, STATS *projection, float projection_scale, inT16 &occupation_count, FPSEGPT_LIST *seg_list, inT16 start, inT16 end)
 

Function Documentation

◆ check_pitch_sync2()

double check_pitch_sync2 ( BLOBNBOX_IT *  blob_it,
inT16  blob_count,
inT16  pitch,
inT16  pitch_error,
STATS projection,
inT16  projection_left,
inT16  projection_right,
float  projection_scale,
inT16 occupation_count,
FPSEGPT_LIST *  seg_list,
inT16  start,
inT16  end 
)

Definition at line 298 of file pithsync.cpp.

311  {
312  BOOL8 faking; //illegal cut pt
313  BOOL8 mid_cut; //cheap cut pt.
314  inT16 x; //current coord
315  inT16 blob_index; //blob number
316  inT16 left_edge; //of word
317  inT16 right_edge; //of word
318  inT16 array_origin; //x coord of array
319  inT16 offset; //dist to legal area
320  inT16 zero_count; //projection zero
321  inT16 best_left_x = 0; //for equals
322  inT16 best_right_x = 0; //right edge
323  TBOX this_box; //bounding box
324  TBOX next_box; //box of next blob
325  FPSEGPT *segpt; //segment point
326  FPCUTPT *cutpts; //array of points
327  double best_cost; //best path
328  double mean_sum; //computes result
329  FPCUTPT *best_end; //end of best path
330  inT16 best_fake; //best fake level
331  inT16 best_count; //no of cuts
332  BLOBNBOX_IT this_it; //copy iterator
333  FPSEGPT_IT seg_it = seg_list; //output iterator
334 
335  // tprintf("Computing sync on word of %d blobs with pitch %d\n",
336  // blob_count, pitch);
337  // if (blob_count==8 && pitch==27)
338  // projection->print(stdout,TRUE);
339  zero_count = 0;
340  if (pitch < 3)
341  pitch = 3; //nothing ludicrous
342  if ((pitch - 3) / 2 < pitch_error)
343  pitch_error = (pitch - 3) / 2;
344  this_it = *blob_it;
345  this_box = box_next (&this_it);//get box
346  // left_edge=this_box.left(); //left of word
347  // right_edge=this_box.right();
348  // for (blob_index=1;blob_index<blob_count;blob_index++)
349  // {
350  // this_box=box_next(&this_it);
351  // if (this_box.right()>right_edge)
352  // right_edge=this_box.right();
353  // }
354  for (left_edge = projection_left; projection->pile_count (left_edge) == 0
355  && left_edge < projection_right; left_edge++);
356  for (right_edge = projection_right; projection->pile_count (right_edge) == 0
357  && right_edge > left_edge; right_edge--);
358  ASSERT_HOST (right_edge >= left_edge);
359  if (pitsync_linear_version >= 4)
360  return check_pitch_sync3 (projection_left, projection_right, zero_count,
361  pitch, pitch_error, projection,
362  projection_scale, occupation_count, seg_list,
363  start, end);
364  array_origin = left_edge - pitch;
365  cutpts = (FPCUTPT *) alloc_mem ((right_edge - left_edge + pitch * 2 + 1)
366  * sizeof (FPCUTPT));
367  for (x = array_origin; x < left_edge; x++)
368  //free cuts
369  cutpts[x - array_origin].setup (cutpts, array_origin, projection, zero_count, pitch, x, 0);
370  for (offset = 0; offset <= pitch_error; offset++, x++)
371  //not quite free
372  cutpts[x - array_origin].setup (cutpts, array_origin, projection, zero_count, pitch, x, offset);
373 
374  this_it = *blob_it;
375  best_cost = MAX_FLOAT32;
376  best_end = NULL;
377  this_box = box_next (&this_it);//first box
378  next_box = box_next (&this_it);//second box
379  blob_index = 1;
380  while (x < right_edge - pitch_error) {
381  if (x > this_box.right () + pitch_error && blob_index < blob_count) {
382  this_box = next_box;
383  next_box = box_next (&this_it);
384  blob_index++;
385  }
386  faking = FALSE;
387  mid_cut = FALSE;
388  if (x <= this_box.left ())
389  offset = 0;
390  else if (x <= this_box.left () + pitch_error)
391  offset = x - this_box.left ();
392  else if (x >= this_box.right ())
393  offset = 0;
394  else if (x >= next_box.left () && blob_index < blob_count) {
395  offset = x - next_box.left ();
396  if (this_box.right () - x < offset)
397  offset = this_box.right () - x;
398  }
399  else if (x >= this_box.right () - pitch_error)
400  offset = this_box.right () - x;
401  else if (x - this_box.left () > pitch * pitsync_joined_edge
402  && this_box.right () - x > pitch * pitsync_joined_edge) {
403  mid_cut = TRUE;
404  offset = 0;
405  }
406  else {
407  faking = TRUE;
408  offset = projection->pile_count (x);
409  }
410  cutpts[x - array_origin].assign (cutpts, array_origin, x,
411  faking, mid_cut, offset, projection,
412  projection_scale, zero_count, pitch,
413  pitch_error);
414  x++;
415  }
416 
417  best_fake = MAX_INT16;
418  best_cost = MAX_INT32;
419  best_count = MAX_INT16;
420  while (x < right_edge + pitch) {
421  offset = x < right_edge ? right_edge - x : 0;
422  cutpts[x - array_origin].assign (cutpts, array_origin, x,
423  FALSE, FALSE, offset, projection,
424  projection_scale, zero_count, pitch,
425  pitch_error);
426  cutpts[x - array_origin].terminal = TRUE;
427  if (cutpts[x - array_origin].index () +
428  cutpts[x - array_origin].fake_count <= best_count + best_fake) {
429  if (cutpts[x - array_origin].fake_count < best_fake
430  || (cutpts[x - array_origin].fake_count == best_fake
431  && cutpts[x - array_origin].cost_function () < best_cost)) {
432  best_fake = cutpts[x - array_origin].fake_count;
433  best_cost = cutpts[x - array_origin].cost_function ();
434  best_left_x = x;
435  best_right_x = x;
436  best_count = cutpts[x - array_origin].index ();
437  }
438  else if (cutpts[x - array_origin].fake_count == best_fake
439  && x == best_right_x + 1
440  && cutpts[x - array_origin].cost_function () == best_cost) {
441  //exactly equal
442  best_right_x = x;
443  }
444  }
445  x++;
446  }
447  ASSERT_HOST (best_fake < MAX_INT16);
448 
449  best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
450  if (this_box.right () == textord_test_x
451  && this_box.top () == textord_test_y) {
452  for (x = left_edge - pitch; x < right_edge + pitch; x++) {
453  tprintf ("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
454  x, cutpts[x - array_origin].cost_function (),
455  cutpts[x - array_origin].sum (),
456  cutpts[x - array_origin].squares (),
457  cutpts[x - array_origin].previous ()->position ());
458  }
459  }
460  occupation_count = -1;
461  do {
462  for (x = best_end->position () - pitch + pitch_error;
463  x < best_end->position () - pitch_error
464  && projection->pile_count (x) == 0; x++);
465  if (x < best_end->position () - pitch_error)
466  occupation_count++;
467  //copy it
468  segpt = new FPSEGPT (best_end);
469  seg_it.add_before_then_move (segpt);
470  best_end = best_end->previous ();
471  }
472  while (best_end != NULL);
473  seg_it.move_to_last ();
474  mean_sum = seg_it.data ()->sum ();
475  mean_sum = mean_sum * mean_sum / best_count;
476  if (seg_it.data ()->squares () - mean_sum < 0)
477  tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
478  seg_it.data ()->squares (), seg_it.data ()->sum (), best_count);
479  free_mem(cutpts);
480  // tprintf("blob_count=%d, pitch=%d, sync=%g, occ=%d\n",
481  // blob_count,pitch,seg_it.data()->squares()-mean_sum,
482  // occupation_count);
483  return seg_it.data ()->squares () - mean_sum;
484 }
#define TRUE
Definition: capi.h:45
short inT16
Definition: host.h:33
int textord_test_x
Definition: makerow.cpp:62
unsigned char BOOL8
Definition: host.h:46
int textord_test_y
Definition: makerow.cpp:63
EXTERN double pitsync_joined_edge
Definition: pitsync1.cpp:33
#define MAX_FLOAT32
Definition: host.h:57
inT16 index() const
Definition: pithsync.h:87
#define MAX_INT16
Definition: host.h:52
#define FALSE
Definition: capi.h:46
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
FPCUTPT * previous()
Definition: pithsync.h:81
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:631
inT16 left() const
Definition: rect.h:68
#define tprintf(...)
Definition: tprintf.h:31
#define MAX_INT32
Definition: host.h:53
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
inT16 top() const
Definition: rect.h:54
inT32 position()
Definition: pithsync.h:69
double cost_function()
Definition: pithsync.h:72
double sum()
Definition: pithsync.h:78
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
BOOL8 terminal
Definition: pithsync.h:92
void free_mem(void *oldchunk)
Definition: memry.cpp:55
inT16 fake_count
Definition: pithsync.h:93
double check_pitch_sync3(inT16 projection_left, inT16 projection_right, inT16 zero_count, inT16 pitch, inT16 pitch_error, STATS *projection, float projection_scale, inT16 &occupation_count, FPSEGPT_LIST *seg_list, inT16 start, inT16 end)
Definition: pithsync.cpp:495
void assign(FPCUTPT cutpts[], inT16 array_origin, inT16 x, BOOL8 faking, BOOL8 mid_cut, inT16 offset, STATS *projection, float projection_scale, inT16 zero_count, inT16 pitch, inT16 pitch_error)
Definition: pithsync.cpp:98
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ check_pitch_sync3()

double check_pitch_sync3 ( inT16  projection_left,
inT16  projection_right,
inT16  zero_count,
inT16  pitch,
inT16  pitch_error,
STATS projection,
float  projection_scale,
inT16 occupation_count,
FPSEGPT_LIST *  seg_list,
inT16  start,
inT16  end 
)

Definition at line 495 of file pithsync.cpp.

507  {
508  BOOL8 faking; //illegal cut pt
509  BOOL8 mid_cut; //cheap cut pt.
510  inT16 left_edge; //of word
511  inT16 right_edge; //of word
512  inT16 x; //current coord
513  inT16 array_origin; //x coord of array
514  inT16 offset; //dist to legal area
515  inT16 projection_offset; //from scaled projection
516  inT16 prev_zero; //previous zero dist
517  inT16 next_zero; //next zero dist
518  inT16 zero_offset; //scan window
519  inT16 best_left_x = 0; //for equals
520  inT16 best_right_x = 0; //right edge
521  FPSEGPT *segpt; //segment point
522  FPCUTPT *cutpts; //array of points
523  BOOL8 *mins; //local min results
524  int minindex; //next input position
525  int test_index; //index to mins
526  double best_cost; //best path
527  double mean_sum; //computes result
528  FPCUTPT *best_end; //end of best path
529  inT16 best_fake; //best fake level
530  inT16 best_count; //no of cuts
531  FPSEGPT_IT seg_it = seg_list; //output iterator
532 
533  end = (end - start) % pitch;
534  if (pitch < 3)
535  pitch = 3; //nothing ludicrous
536  if ((pitch - 3) / 2 < pitch_error)
537  pitch_error = (pitch - 3) / 2;
538  //min dist of zero
539  zero_offset = (inT16) (pitch * pitsync_joined_edge);
540  for (left_edge = projection_left; projection->pile_count (left_edge) == 0
541  && left_edge < projection_right; left_edge++);
542  for (right_edge = projection_right; projection->pile_count (right_edge) == 0
543  && right_edge > left_edge; right_edge--);
544  array_origin = left_edge - pitch;
545  cutpts = (FPCUTPT *) alloc_mem ((right_edge - left_edge + pitch * 2 + 1)
546  * sizeof (FPCUTPT));
547  mins = (BOOL8 *) alloc_mem ((pitch_error * 2 + 1) * sizeof (BOOL8));
548  for (x = array_origin; x < left_edge; x++)
549  //free cuts
550  cutpts[x - array_origin].setup (cutpts, array_origin, projection, zero_count, pitch, x, 0);
551  prev_zero = left_edge - 1;
552  for (offset = 0; offset <= pitch_error; offset++, x++)
553  //not quite free
554  cutpts[x - array_origin].setup (cutpts, array_origin, projection, zero_count, pitch, x, offset);
555 
556  best_cost = MAX_FLOAT32;
557  best_end = NULL;
558  for (offset = -pitch_error, minindex = 0; offset < pitch_error;
559  offset++, minindex++)
560  mins[minindex] = projection->local_min (x + offset);
561  next_zero = x + zero_offset + 1;
562  for (offset = next_zero - 1; offset >= x; offset--) {
563  if (projection->pile_count (offset) <= zero_count) {
564  next_zero = offset;
565  break;
566  }
567  }
568  while (x < right_edge - pitch_error) {
569  mins[minindex] = projection->local_min (x + pitch_error);
570  minindex++;
571  if (minindex > pitch_error * 2)
572  minindex = 0;
573  faking = FALSE;
574  mid_cut = FALSE;
575  offset = 0;
576  if (projection->pile_count (x) <= zero_count) {
577  prev_zero = x;
578  }
579  else {
580  for (offset = 1; offset <= pitch_error; offset++)
581  if (projection->pile_count (x + offset) <= zero_count
582  || projection->pile_count (x - offset) <= zero_count)
583  break;
584  }
585  if (offset > pitch_error) {
586  if (x - prev_zero > zero_offset && next_zero - x > zero_offset) {
587  for (offset = 0; offset <= pitch_error; offset++) {
588  test_index = minindex + pitch_error + offset;
589  if (test_index > pitch_error * 2)
590  test_index -= pitch_error * 2 + 1;
591  if (mins[test_index])
592  break;
593  test_index = minindex + pitch_error - offset;
594  if (test_index > pitch_error * 2)
595  test_index -= pitch_error * 2 + 1;
596  if (mins[test_index])
597  break;
598  }
599  }
600  if (offset > pitch_error) {
601  offset = projection->pile_count (x);
602  faking = TRUE;
603  }
604  else {
605  projection_offset =
606  (inT16) (projection->pile_count (x) / projection_scale);
607  if (projection_offset > offset)
608  offset = projection_offset;
609  mid_cut = TRUE;
610  }
611  }
612  if ((start == 0 && end == 0)
614  || (x - projection_left - start) % pitch <= end)
615  cutpts[x - array_origin].assign (cutpts, array_origin, x,
616  faking, mid_cut, offset, projection,
617  projection_scale, zero_count, pitch,
618  pitch_error);
619  else
620  cutpts[x - array_origin].assign_cheap (cutpts, array_origin, x,
621  faking, mid_cut, offset,
622  projection, projection_scale,
623  zero_count, pitch,
624  pitch_error);
625  x++;
626  if (next_zero < x || next_zero == x + zero_offset)
627  next_zero = x + zero_offset + 1;
628  if (projection->pile_count (x + zero_offset) <= zero_count)
629  next_zero = x + zero_offset;
630  }
631 
632  best_fake = MAX_INT16;
633  best_cost = MAX_INT32;
634  best_count = MAX_INT16;
635  while (x < right_edge + pitch) {
636  offset = x < right_edge ? right_edge - x : 0;
637  cutpts[x - array_origin].assign (cutpts, array_origin, x,
638  FALSE, FALSE, offset, projection,
639  projection_scale, zero_count, pitch,
640  pitch_error);
641  cutpts[x - array_origin].terminal = TRUE;
642  if (cutpts[x - array_origin].index () +
643  cutpts[x - array_origin].fake_count <= best_count + best_fake) {
644  if (cutpts[x - array_origin].fake_count < best_fake
645  || (cutpts[x - array_origin].fake_count == best_fake
646  && cutpts[x - array_origin].cost_function () < best_cost)) {
647  best_fake = cutpts[x - array_origin].fake_count;
648  best_cost = cutpts[x - array_origin].cost_function ();
649  best_left_x = x;
650  best_right_x = x;
651  best_count = cutpts[x - array_origin].index ();
652  }
653  else if (cutpts[x - array_origin].fake_count == best_fake
654  && x == best_right_x + 1
655  && cutpts[x - array_origin].cost_function () == best_cost) {
656  //exactly equal
657  best_right_x = x;
658  }
659  }
660  x++;
661  }
662  ASSERT_HOST (best_fake < MAX_INT16);
663 
664  best_end = &cutpts[(best_left_x + best_right_x) / 2 - array_origin];
665  // for (x=left_edge-pitch;x<right_edge+pitch;x++)
666  // {
667  // tprintf("x=%d, C=%g, s=%g, sq=%g, prev=%d\n",
668  // x,cutpts[x-array_origin].cost_function(),
669  // cutpts[x-array_origin].sum(),
670  // cutpts[x-array_origin].squares(),
671  // cutpts[x-array_origin].previous()->position());
672  // }
673  occupation_count = -1;
674  do {
675  for (x = best_end->position () - pitch + pitch_error;
676  x < best_end->position () - pitch_error
677  && projection->pile_count (x) == 0; x++);
678  if (x < best_end->position () - pitch_error)
679  occupation_count++;
680  //copy it
681  segpt = new FPSEGPT (best_end);
682  seg_it.add_before_then_move (segpt);
683  best_end = best_end->previous ();
684  }
685  while (best_end != NULL);
686  seg_it.move_to_last ();
687  mean_sum = seg_it.data ()->sum ();
688  mean_sum = mean_sum * mean_sum / best_count;
689  if (seg_it.data ()->squares () - mean_sum < 0)
690  tprintf ("Impossible sqsum=%g, mean=%g, total=%d\n",
691  seg_it.data ()->squares (), seg_it.data ()->sum (), best_count);
692  free_mem(mins);
693  free_mem(cutpts);
694  return seg_it.data ()->squares () - mean_sum;
695 }
#define TRUE
Definition: capi.h:45
short inT16
Definition: host.h:33
bool local_min(inT32 x) const
Definition: statistc.cpp:262
void assign_cheap(FPCUTPT cutpts[], inT16 array_origin, inT16 x, BOOL8 faking, BOOL8 mid_cut, inT16 offset, STATS *projection, float projection_scale, inT16 zero_count, inT16 pitch, inT16 pitch_error)
Definition: pithsync.cpp:206
unsigned char BOOL8
Definition: host.h:46
EXTERN double pitsync_joined_edge
Definition: pitsync1.cpp:33
#define MAX_FLOAT32
Definition: host.h:57
inT16 index() const
Definition: pithsync.h:87
#define MAX_INT16
Definition: host.h:52
#define FALSE
Definition: capi.h:46
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
FPCUTPT * previous()
Definition: pithsync.h:81
#define tprintf(...)
Definition: tprintf.h:31
#define MAX_INT32
Definition: host.h:53
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
inT32 position()
Definition: pithsync.h:69
double cost_function()
Definition: pithsync.h:72
double sum()
Definition: pithsync.h:78
BOOL8 terminal
Definition: pithsync.h:92
void free_mem(void *oldchunk)
Definition: memry.cpp:55
inT16 fake_count
Definition: pithsync.h:93
void assign(FPCUTPT cutpts[], inT16 array_origin, inT16 x, BOOL8 faking, BOOL8 mid_cut, inT16 offset, STATS *projection, float projection_scale, inT16 zero_count, inT16 pitch, inT16 pitch_error)
Definition: pithsync.cpp:98
#define ASSERT_HOST(x)
Definition: errcode.h:84
EXTERN bool textord_fast_pitch_test
Definition: topitch.cpp:48