44 "Debug on fixed pitch test");
46 "Turn off dp fixed pitch algorithm");
48 "Do even faster pitch algorithm");
50 "Write full metric stuff");
54 "Use correct answer for fixed/prop");
56 "Attempt whole doc/block fixed pitch");
59 "Ding rate for unbalanced char cells");
61 #define FIXED_WIDTH_MULTIPLE 5 62 #define BLOCK_STATS_CLUSTERS 10 63 #define MAX_ALLOWED_PITCH 100 //max pixel pitch. 74 TO_BLOCK_LIST *port_blocks,
85 #ifndef GRAPHICS_DISABLED 92 block_it.set_to_list (port_blocks);
94 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
95 block_it.forward ()) {
96 block = block_it.data ();
103 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
104 block_it.forward ()) {
105 block = block_it.data ();
113 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
114 block_it.forward()) {
115 block = block_it.data ();
117 if (pb != NULL && !pb->
IsText())
continue;
118 row_it.set_to_list (block->
get_rows ());
120 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
121 row = row_it.data ();
122 fix_row_pitch(row, block, port_blocks, row_index, block_index);
127 #ifndef GRAPHICS_DISABLED 144 TO_BLOCK_LIST *blocks,
146 inT32 block_target) {
154 TO_BLOCK_IT block_it = blocks;
162 block_votes = like_votes = other_votes = 0;
169 for (block_it.mark_cycle_pt(); !block_it.cycled_list();
170 block_it.forward()) {
171 block = block_it.data();
173 if (pb != NULL && !pb->
IsText())
continue;
175 row_it.set_to_list (block->
get_rows ());
176 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
178 row = row_it.data ();
192 if (block_index == block_target) {
247 else if (block_votes <= textord_words_veto_power && like_votes > 0) {
253 if (block_votes == 0 && like_votes == 0 && other_votes > 0
256 (
"Warning:row %d of block %d set prop with no like rows against trend\n",
257 row_target, block_target);
261 tprintf(
":b_votes=%d:l_votes=%d:o_votes=%d",
262 block_votes, like_votes, other_votes);
269 else if (block_votes == 0 && like_votes > 0)
273 (
"Warning:guessing pitch as xheight on row %d, block %d\n",
274 row_target, block_target);
316 tprintf (
"Block %d at (%d,%d)->(%d,%d)\n",
319 block_box.
right (), block_box.
top ());
330 if (!block->
get_rows ()->empty ()) {
333 #ifndef GRAPHICS_DISABLED 360 TO_ROW_IT row_it = block->
get_rows ();
363 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
364 row = row_it.data ();
371 row_index, testing_on)) {
397 TO_BLOCK_LIST *port_blocks,
407 TO_BLOCK_IT block_it = port_blocks;
411 inT16 projection_left;
412 inT16 projection_right;
415 ICOORDELT_LIST *master_cells;
428 if (block_it.empty ()
432 shift_factor = gradient / (gradient * gradient + 1);
433 row_it.set_to_list (block_it.data ()->get_rows ());
434 master_x = row_it.data ()->projection_left;
435 master_y = row_it.data ()->baseline.y (master_x);
442 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
443 block_it.forward ()) {
444 block = block_it.data ();
445 row_it.set_to_list (block->
get_rows ());
446 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
447 row = row_it.data ();
455 shift_factor * (master_y - row_y));
458 shift_factor * (master_y - row_y));
459 if (row_left < projection_left)
460 projection_left = row_left;
461 if (row_right > projection_right)
462 projection_right = row_right;
467 projection.
set_range (projection_left, projection_right);
469 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
470 block_it.forward ()) {
471 block = block_it.data ();
472 row_it.set_to_list (block->
get_rows ());
473 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
474 row = row_it.data ();
478 shift_factor * (master_y - row_y));
486 row_it.set_to_list (block_it.data ()->get_rows ());
487 row = row_it.data ();
488 #ifndef GRAPHICS_DISABLED 493 final_pitch = pitches.
ile (0.5);
494 pitch = (
inT16) final_pitch;
496 tune_row_pitch (row, &projection, projection_left, projection_right,
497 pitch * 0.75, final_pitch, sp_sd, mid_cuts,
502 (
"try_doc:props=%d:fixed=%d:pitch=%d:final_pitch=%g:pitch_sd=%g:sp_sd=%g:sd/trc=%g:sd/p=%g:sd/trc/p=%g\n",
503 prop_blocks, fixed_blocks, pitch, final_pitch, pitch_sd, sp_sd,
504 pitch_sd / total_row_count, pitch_sd / pitch,
505 pitch_sd / total_row_count / pitch);
507 #ifndef GRAPHICS_DISABLED 510 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
511 block_it.forward ()) {
512 block = block_it.data ();
513 row_it.set_to_list (block->
get_rows ());
514 for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
516 row = row_it.data ();
518 row_shift = shift_factor * (master_y - row_y);
558 inT32 maybe_fixed = 0;
559 inT32 maybe_prop = 0;
561 inT32 corr_fixed = 0;
564 TO_ROW_IT row_it = block->
get_rows ();
567 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
568 row = row_it.data ();
599 else if (def_fixed > 0 || def_prop > 0)
623 inT32 maybe_fixed = 0;
624 inT32 maybe_prop = 0;
626 inT32 corr_fixed = 0;
637 tprintf (
"Block %d has (%d,%d,%d)",
638 block_index, def_fixed, maybe_fixed, corr_fixed);
641 tprintf (
" fixed, (%d,%d,%d)", def_prop, maybe_prop, corr_prop);
644 tprintf (
" prop, %d dunno\n", dunno);
664 TO_ROW_IT row_it = block->
get_rows ();
666 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
667 row = row_it.data ();
718 STATS gap_stats (0, maxwidth);
724 if (!blob_it.empty ()) {
725 prev_x = blob_it.data ()->bounding_box ().right ();
727 while (!blob_it.at_first ()) {
728 blob = blob_it.data ();
731 if (blob_box.
left () - prev_x < maxwidth)
732 gap_stats.
add (blob_box.
left () - prev_x, 1);
733 prev_x = blob_box.
right ();
744 gap_stats.
smooth (smooth_factor);
746 prev_count = cluster_count;
747 cluster_count = gap_stats.
cluster (lower, upper,
752 if (cluster_count < 1) {
755 for (gap_index = 0; gap_index < cluster_count; gap_index++)
756 gaps[gap_index] = cluster_stats[gap_index + 1].ile (0.5);
759 tprintf (
"cluster_count=%d:", cluster_count);
760 for (gap_index = 0; gap_index < cluster_count; gap_index++)
761 tprintf (
" %g(%d)", gaps[gap_index],
762 cluster_stats[gap_index + 1].get_total ());
765 qsort (gaps, cluster_count,
sizeof (
float),
sort_floats);
770 for (gap_index = 0; gap_index < cluster_count
771 && gaps[gap_index] < lower; gap_index++);
772 if (gap_index == 0) {
774 tprintf (
"No clusters below nonspace threshold!!\n");
775 if (cluster_count > 1) {
785 row->
pr_nonsp = gaps[gap_index - 1];
786 while (gap_index < cluster_count && gaps[gap_index] < upper)
788 if (gap_index == cluster_count) {
790 tprintf (
"No clusters above nonspace threshold!!\n");
799 for (gap_index = 0; gap_index < cluster_count
800 && gaps[gap_index] < upper; gap_index++);
801 if (gap_index == 0) {
803 tprintf (
"No clusters below space threshold!!\n");
808 row->
fp_nonsp = gaps[gap_index - 1];
809 if (gap_index == cluster_count) {
811 tprintf (
"No clusters above space threshold!!\n");
819 (
"Initial estimates:pr_nonsp=%g, pr_space=%g, fp_nonsp=%g, fp_space=%g\n",
853 STATS gap_stats (0, maxwidth);
855 STATS pitch_stats (0, maxwidth);
862 if (non_space > initial_pitch)
863 non_space = initial_pitch;
864 min_space = (initial_pitch + non_space) / 2;
867 initial_pitch, min_space,
TRUE,
FALSE, dm_gap)) {
869 dm_pitch_iqr = maxwidth * 2.0f;
870 dm_pitch = initial_pitch;
873 dm_gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
874 dm_pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
875 dm_pitch = pitch_stats.
ile (0.5);
878 pitch_stats.
clear ();
880 initial_pitch, min_space,
TRUE,
FALSE, 0)) {
882 pitch_iqr = maxwidth * 3.0f;
885 gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
886 pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
889 (
"First fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
890 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.
ile (0.5));
891 initial_pitch = pitch_stats.
ile (0.5);
892 if (min_space > initial_pitch
894 initial_pitch, initial_pitch,
TRUE,
FALSE, 0)) {
895 min_space = initial_pitch;
896 gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
897 pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
900 (
"Revised fp iteration:initial_pitch=%g, gap_iqr=%g, pitch_iqr=%g, pitch=%g\n",
901 initial_pitch, gap_iqr, pitch_iqr, pitch_stats.
ile (0.5));
902 initial_pitch = pitch_stats.
ile (0.5);
906 tprintf(
"Blk=%d:Row=%d:%c:p_iqr=%g:g_iqr=%g:dm_p_iqr=%g:dm_g_iqr=%g:%c:",
907 block_index, row_index,
'X',
908 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr,
909 pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth ?
'D' :
910 (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr ?
'S' :
'M'));
911 if (pitch_iqr > maxwidth && dm_pitch_iqr > maxwidth) {
917 if (pitch_iqr * dm_gap_iqr <= dm_pitch_iqr * gap_iqr) {
920 (
"Choosing non dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
921 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
922 gap_iqr = gap_stats.
ile (0.75) - gap_stats.
ile (0.25);
923 pitch_iqr = pitch_stats.
ile (0.75) - pitch_stats.
ile (0.25);
924 pitch = pitch_stats.
ile (0.5);
925 used_dm_model =
FALSE;
930 (
"Choosing dm version:pitch_iqr=%g, gap_iqr=%g, dm_pitch_iqr=%g, dm_gap_iqr=%g\n",
931 pitch_iqr, gap_iqr, dm_pitch_iqr, dm_gap_iqr);
932 gap_iqr = dm_gap_iqr;
933 pitch_iqr = dm_pitch_iqr;
935 used_dm_model =
TRUE;
938 tprintf (
"rev_p_iqr=%g:rev_g_iqr=%g:pitch=%g:",
939 pitch_iqr, gap_iqr, pitch);
940 tprintf (
"p_iqr/g=%g:p_iqr/x=%g:iqr_res=%c:",
941 pitch_iqr / gap_iqr, pitch_iqr / block->
xheight,
978 const char *res_string;
999 if (pitch_sd < textord_words_pitchsd_threshold * row->fixed_pitch
1000 && ((pitsync_linear_version & 3) < 3
1001 || ((pitsync_linear_version & 3) >= 3 && (row->
used_dm_model 1003 || (pitch_sd == 0 && sp_sd > 10))))) {
1004 if (pitch_sd < textord_words_def_fixed * row->fixed_pitch
1006 && ((pitsync_linear_version & 3) < 3 || sp_sd > 20))
1011 else if ((pitsync_linear_version & 3) < 3
1015 if (pitch_sd < textord_words_def_prop * row->fixed_pitch)
1042 tprintf (
":sd/p=%g:occ=%g:init_res=%s\n",
1062 float initial_pitch,
1064 BOOL8 ignore_outsize,
1065 BOOL8 split_outsize,
1071 BLOBNBOX_IT blob_it = row->
blob_list ();
1081 gap_stats->
clear ();
1082 pitch_stats->
clear ();
1083 if (blob_it.empty ())
1088 joined_box = blob_it.data ()->bounding_box ();
1091 blob = blob_it.data ();
1094 if ((blob_box.
left () - joined_box.
right () < dm_gap
1095 && !blob_it.at_first ())
1096 || blob->
cblob() == NULL)
1097 joined_box += blob_box;
1099 blob_width = joined_box.
width ();
1100 if (split_outsize) {
1102 (
inT32) floor ((
float) blob_width / initial_pitch + 0.5);
1103 if (width_units < 1)
1107 else if (ignore_outsize) {
1108 width = (float) blob_width / initial_pitch;
1116 width_units * initial_pitch) / 2);
1117 if (prev_valid && width_units >= 0) {
1123 gap_stats->
add (joined_box.
left () - prev_right, 1);
1124 pitch_stats->
add (x_centre - prev_centre, 1);
1126 prev_centre = (
inT32) (x_centre + width_units * initial_pitch);
1127 prev_right = joined_box.
right ();
1128 prev_valid = blob_box.
left () - joined_box.
right () < min_space;
1129 prev_valid = prev_valid && width_units >= 0;
1130 joined_box = blob_box;
1134 while (!blob_it.at_first ());
1149 inT16 projection_left,
1150 inT16 projection_right,
1152 float &initial_pitch,
1154 inT16 &best_mid_cuts,
1155 ICOORDELT_LIST *best_cells,
1165 ICOORDELT_LIST test_cells;
1166 ICOORDELT_IT best_it;
1170 projection_right, space_size, initial_pitch,
1173 best_mid_cuts, best_cells, testing_on);
1175 best_sp_sd = initial_pitch;
1176 return initial_pitch;
1189 best_sd = initial_sd;
1190 best_pitch = initial_pitch;
1192 tprintf (
"tune_row_pitch:start pitch=%g, sd=%g\n", best_pitch, best_sd);
1196 space_size, initial_pitch + pitch_delta, sp_sd,
1197 mid_cuts, &test_cells, testing_on);
1199 tprintf (
"testing pitch at %g, sd=%g\n", initial_pitch + pitch_delta,
1201 if (pitch_sd < best_sd) {
1203 best_mid_cuts = mid_cuts;
1205 best_pitch = initial_pitch + pitch_delta;
1206 best_cells->clear ();
1207 best_it.set_to_list (best_cells);
1208 best_it.add_list_after (&test_cells);
1211 test_cells.clear ();
1212 if (pitch_sd > initial_sd)
1218 space_size, initial_pitch - pitch_delta, sp_sd,
1219 mid_cuts, &test_cells, testing_on);
1221 tprintf (
"testing pitch at %g, sd=%g\n", initial_pitch - pitch_delta,
1223 if (pitch_sd < best_sd) {
1225 best_mid_cuts = mid_cuts;
1227 best_pitch = initial_pitch - pitch_delta;
1228 best_cells->clear ();
1229 best_it.set_to_list (best_cells);
1230 best_it.add_list_after (&test_cells);
1233 test_cells.clear ();
1234 if (pitch_sd > initial_sd)
1237 initial_pitch = best_pitch;
1261 inT16 projection_left,
1262 inT16 projection_right,
1264 float &initial_pitch,
1266 inT16 &best_mid_cuts,
1267 ICOORDELT_LIST *best_cells,
1281 best_sp_sd = initial_pitch;
1283 best_pitch =
static_cast<int>(initial_pitch);
1285 return initial_pitch;
1294 for (pixel = projection_left; pixel <= projection_right; pixel++) {
1298 (pixel - projection_left) % (best_pitch + pitch_delta),
1307 for (pixel = 0; pixel < best_pitch + pitch_delta; pixel++) {
1313 best_delta = pitch_delta;
1319 tprintf (
"tune_row_pitch:start pitch=%g, best_delta=%d, count=%d\n",
1320 initial_pitch, best_delta, best_count);
1321 best_pitch += best_delta;
1322 initial_pitch = best_pitch;
1324 best_count += best_count;
1325 for (start = best_pixel - 2; start > best_pixel - best_pitch
1327 best_delta].
pile_count (start % best_pitch) <= best_count;
1329 for (end = best_pixel + 2;
1330 end < best_pixel + best_pitch
1332 best_delta].
pile_count (end % best_pitch) <= best_count;
1349 tprintf (
"tune_row_pitch:output pitch=%g, sd=%g\n", initial_pitch,
1376 inT16 projection_left,
1377 inT16 projection_right,
1379 float initial_pitch,
1382 ICOORDELT_LIST *row_cells,
1389 BLOBNBOX_IT blob_it = row->
blob_list ();
1390 BLOBNBOX_IT start_it;
1391 BLOBNBOX_IT plot_it;
1398 FPSEGPT_LIST seg_list;
1403 ICOORDELT_IT cell_it = row_cells;
1411 if ((pitsync_linear_version & 3) > 1) {
1413 projection_right, initial_pitch,
1414 occupation, mid_cuts, row_cells,
1415 testing_on, start, end);
1426 if (blob_it.empty ())
1427 return space_size * 10;
1428 #ifndef GRAPHICS_DISABLED 1429 if (testing_on &&
to_win != NULL) {
1430 blob_box = blob_it.data ()->bounding_box ();
1438 blob_it.mark_cycle_pt ();
1440 for (; blob_count > 0; blob_count--)
1443 prev_box = blob_box;
1447 while (!blob_it.cycled_list ()
1448 && blob_box.
left () - prev_box.
right () < space_size);
1450 if (pitsync_linear_version & 3)
1453 projection, projection_left, projection_right,
1455 occupation, &seg_list, start, end);
1459 projection, &seg_list);
1461 tprintf (
"Word ending at (%d,%d), len=%d, sync rating=%g, ",
1462 prev_box.
right (), prev_box.
top (),
1463 seg_list.length () - 1, word_sync);
1464 seg_it.set_to_list (&seg_list);
1465 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list ();
1466 seg_it.forward ()) {
1467 if (seg_it.data ()->faked)
1469 tprintf (
"%d, ", seg_it.data ()->position ());
1477 #ifndef GRAPHICS_DISABLED 1481 seg_it.set_to_list (&seg_list);
1482 if (prev_right >= 0) {
1483 sp_var = seg_it.data ()->position () - prev_right;
1484 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1489 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1490 segpos = seg_it.data ()->position ();
1491 if (cell_it.empty () || segpos > cellpos + initial_pitch / 2) {
1493 while (!cell_it.empty () && segpos > cellpos + initial_pitch * 3 / 2) {
1495 cell_it.add_after_then_move (cell);
1496 cellpos += (
inT16) initial_pitch;
1500 cell_it.add_after_then_move (cell);
1503 else if (segpos > cellpos - initial_pitch / 2) {
1504 cell = cell_it.data ();
1506 cell->
set_x ((cellpos + segpos) / 2);
1507 cellpos = cell->
x ();
1510 seg_it.move_to_last ();
1511 prev_right = seg_it.data ()->position ();
1513 scale_factor = (seg_list.length () - 2) / 2;
1514 if (scale_factor < 1)
1519 sqsum += word_sync * scale_factor;
1520 total_count += (seg_list.length () - 1) * scale_factor;
1523 while (!blob_it.cycled_list ());
1524 sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1525 return total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1539 inT16 projection_left,
1540 inT16 projection_right,
1541 float initial_pitch,
1544 ICOORDELT_LIST *row_cells,
1550 BLOBNBOX_IT blob_it = row->
blob_list ();
1551 BLOBNBOX_IT plot_it;
1554 FPSEGPT_LIST seg_list;
1558 ICOORDELT_IT cell_it = row_cells;
1563 if (blob_it.empty ()) {
1565 return initial_pitch * 10;
1567 #ifndef GRAPHICS_DISABLED 1568 if (testing_on &&
to_win != NULL) {
1574 blob_it.mark_cycle_pt ();
1580 while (!blob_it.cycled_list ());
1583 2, projection, projection_left,
1586 occupation, &seg_list, start, end);
1588 tprintf (
"Row ending at (%d,%d), len=%d, sync rating=%g, ",
1589 blob_box.
right (), blob_box.
top (),
1590 seg_list.length () - 1, word_sync);
1591 seg_it.set_to_list (&seg_list);
1592 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1593 if (seg_it.data ()->faked)
1595 tprintf (
"%d, ", seg_it.data ()->position ());
1603 #ifndef GRAPHICS_DISABLED 1607 seg_it.set_to_list (&seg_list);
1608 for (seg_it.mark_cycle_pt (); !seg_it.cycled_list (); seg_it.forward ()) {
1609 segpos = seg_it.data ()->position ();
1612 cell_it.add_after_then_move (cell);
1613 if (seg_it.at_last ())
1614 mid_cuts = seg_it.data ()->cheap_cuts ();
1617 return occupation > 0 ? sqrt (word_sync / occupation) : initial_pitch * 10;
1631 inT16 projection_left,
1632 inT16 projection_right,
1640 BLOBNBOX_IT blob_it = row->
blob_list ();
1641 BLOBNBOX_IT start_it;
1642 BLOBNBOX_IT row_start;
1644 inT16 total_blob_count;
1650 FPSEGPT_LIST seg_list;
1658 if (blob_it.empty ())
1660 row_start = blob_it;
1661 total_blob_count = 0;
1668 blob_it = row_start;
1672 blob_it.mark_cycle_pt ();
1674 for (; blob_count > 0; blob_count--)
1677 prev_box = blob_box;
1681 while (!blob_it.cycled_list ()
1682 && blob_box.
left () - prev_box.
right () < space_size);
1685 projection, projection_left, projection_right,
1687 occupation, &seg_list, 0, 0);
1688 total_blob_count += blob_count;
1689 seg_it.set_to_list (&seg_list);
1690 if (prev_right >= 0) {
1691 sp_var = seg_it.data ()->position () - prev_right;
1692 sp_var -= floor (sp_var / initial_pitch + 0.5) * initial_pitch;
1697 seg_it.move_to_last ();
1698 prev_right = seg_it.data ()->position ();
1700 scale_factor = (seg_list.length () - 2) / 2;
1701 if (scale_factor < 1)
1706 sqsum += word_sync * scale_factor;
1707 total_count += (seg_list.length () - 1) * scale_factor;
1710 while (!blob_it.cycled_list ());
1711 sp_sd = sp_count > 0 ? sqrt (spsum / sp_count) : 0;
1712 word_sync = total_count > 0 ? sqrt (sqsum / total_count) : space_size * 10;
1713 tprintf (
"new_sd=%g:sd/p=%g:new_sp_sd=%g:res=%c:",
1714 word_sync, word_sync / initial_pitch, sp_sd,
1718 start_it = row_start;
1719 blob_it = row_start;
1722 projection, projection_left, projection_right,
1726 word_sync /= occupation;
1727 word_sync = sqrt (word_sync);
1729 #ifndef GRAPHICS_DISABLED 1744 (
"row_sd=%g:sd/p=%g:res=%c:N=%d:res2=%s,init pitch=%g, row_pitch=%g, all_caps=%d\n",
1745 word_sync, word_sync / initial_pitch,
1759 if (pb != NULL && !pb->
IsText())
1764 BLOBNBOX_IT search_it;
1768 int blobcount, repeated_set;
1770 TO_ROW_IT row_it = block->
get_rows();
1771 if (row_it.empty())
return;
1772 for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1773 row = row_it.data();
1775 if (box_it.empty())
continue;
1782 if (box_it.data()->repeated_set() != 0 &&
1783 !box_it.data()->joined_to_prev()) {
1785 repeated_set = box_it.data()->repeated_set();
1787 search_it.forward();
1788 while (!search_it.at_first() &&
1789 search_it.data()->repeated_set() == repeated_set) {
1791 search_it.forward();
1797 if (!box_it.empty() && box_it.data()->joined_to_prev()) {
1798 tprintf(
"Bad box joined to prev at");
1799 box_it.data()->bounding_box().print();
1800 tprintf(
"After repeated word:");
1803 ASSERT_HOST(box_it.empty() || !box_it.data()->joined_to_prev());
1806 word_it.add_after_then_move(word);
1810 }
while (!box_it.at_first());
1821 #ifndef GRAPHICS_DISABLED 1828 TO_ROW_IT row_it = block->
get_rows ();
1830 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1831 row = row_it.data ();
const TBOX & bounding_box() const
float compute_pitch_sd2(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float initial_pitch, inT16 &occupation, inT16 &mid_cuts, ICOORDELT_LIST *row_cells, BOOL8 testing_on, inT16 start, inT16 end)
void plot_fp_word(TO_BLOCK *block, float pitch, float nonspace)
ScrollView * create_to_win(ICOORD page_tr)
EXTERN bool textord_blocksall_prop
EXTERN double textord_projection_scale
BOOL8 find_row_pitch(TO_ROW *row, inT32 maxwidth, inT32 dm_gap, TO_BLOCK *block, inT32 block_index, inT32 row_index, BOOL8 testing_on)
EXTERN bool textord_pitch_cheat
EXTERN bool textord_show_page_cuts
EXTERN double textord_words_min_minspace
EXTERN double textord_pitch_rowsimilarity
void count_block_votes(TO_BLOCK *block, inT32 &def_fixed, inT32 &def_prop, inT32 &maybe_fixed, inT32 &maybe_prop, inT32 &corr_fixed, inT32 &corr_prop, inT32 &dunno)
int num_repeated_sets() const
EXTERN int textord_words_veto_power
BOOL8 fixed_pitch_row(TO_ROW *row, BLOCK *block, inT32 block_index)
#define MAX_ALLOWED_PITCH
EXTERN double textord_words_default_minspace
EXTERN double textord_fpiqr_ratio
EXTERN double textord_max_pitch_iqr
EXTERN int textord_pitch_range
BOOL8 try_doc_fixed(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient)
PITCH_TYPE pitch_decision
EXTERN bool textord_debug_pitch_metric
void plot_row_cells(ScrollView *win, ScrollView::Color colour, TO_ROW *row, float xshift, ICOORDELT_LIST *cells)
void add(inT32 value, inT32 count)
void compute_fixed_pitch(ICOORD page_tr, TO_BLOCK_LIST *port_blocks, float gradient, FCOORD rotation, BOOL8 testing_on)
double check_pitch_sync(BLOBNBOX_IT *blob_it, inT16 blob_count, inT16 pitch, inT16 pitch_error, STATS *projection, FPSEGPT_LIST *seg_list)
BOOL8 row_pitch_stats(TO_ROW *row, inT32 maxwidth, BOOL8 testing_on)
EXTERN bool textord_pitch_scalebigwords
EXTERN bool textord_disable_pitch_test
void print_block_counts(TO_BLOCK *block, inT32 block_index)
EXTERN double textord_spacesize_ratioprop
EXTERN double textord_words_pitchsd_threshold
void plot_word_decisions(ScrollView *win, inT16 pitch, TO_ROW *row)
EXTERN double words_initial_upper
EXTERN int textord_debug_block
void plot_fp_cells2(ScrollView *win, ScrollView::Color colour, TO_ROW *row, FPSEGPT_LIST *seg_list)
void print_pitch_sd(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float initial_pitch)
BLOBNBOX_LIST * blob_list()
PITCH_TYPE pitch_decision
EXTERN double words_default_fixed_space
POLY_BLOCK * poly_block() const
EXTERN double textord_balance_factor
EXTERN bool textord_show_fixed_cuts
BOOL8 compute_rows_pitch(TO_BLOCK *block, inT32 block_index, BOOL8 testing_on)
void compute_vertical_projection()
bool joined_to_prev() const
int sort_floats(const void *arg1, const void *arg2)
inT16 x() const
access function
BOOL8 count_pitch_stats(TO_ROW *row, STATS *gap_stats, STATS *pitch_stats, float initial_pitch, float min_space, BOOL8 ignore_outsize, BOOL8 split_outsize, inT32 dm_gap)
TBOX box_next(BLOBNBOX_IT *it)
void smooth(inT32 factor)
void mark_repeated_chars(TO_ROW *row)
float tune_row_pitch(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float &initial_pitch, float &best_sp_sd, inT16 &best_mid_cuts, ICOORDELT_LIST *best_cells, BOOL8 testing_on)
bool rep_chars_marked() const
EXTERN double textord_words_default_nonspace
ICOORDELT_LIST char_cells
double check_pitch_sync2(BLOBNBOX_IT *blob_it, inT16 blob_count, inT16 pitch, inT16 pitch_error, STATS *projection, inT16 projection_left, inT16 projection_right, float projection_scale, inT16 &occupation_count, FPSEGPT_LIST *seg_list, inT16 start, inT16 end)
EXTERN bool textord_show_row_cuts
inT32 cluster(float lower, float upper, float multiple, inT32 max_clusters, STATS *clusters)
EXTERN double textord_words_maxspace
EXTERN bool textord_debug_pitch_test
EXTERN double words_default_fixed_limit
EXTERN bool textord_all_prop
inT32 pile_count(inT32 value) const
float tune_row_pitch2(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float &initial_pitch, float &best_sp_sd, inT16 &best_mid_cuts, ICOORDELT_LIST *best_cells, BOOL8 testing_on)
EXTERN double textord_words_default_maxspace
void compute_block_pitch(TO_BLOCK *block, FCOORD rotation, inT32 block_index, BOOL8 testing_on)
void set_x(inT16 xin)
rewrite function
#define BLOCK_STATS_CLUSTERS
double ile(double frac) const
EXTERN double words_initial_lower
void fix_row_pitch(TO_ROW *bad_row, TO_BLOCK *bad_block, TO_BLOCK_LIST *blocks, inT32 row_target, inT32 block_target)
BOOL8 try_block_fixed(TO_BLOCK *block, inT32 block_index)
EXTERN bool textord_blockndoc_fixed
EXTERN int textord_dotmatrix_gap
void find_repeated_chars(TO_BLOCK *block, BOOL8 testing_on)
#define double_VAR(name, val, comment)
EXTERN ScrollView * to_win
BOOL8 try_rows_fixed(TO_BLOCK *block, inT32 block_index, BOOL8 testing_on)
void set_flag(WERD_FLAGS mask, BOOL8 value)
EXTERN double textord_words_def_prop
#define BOOL_VAR(name, val, comment)
EXTERN double textord_wordstats_smooth_factor
EXTERN double words_default_prop_nonspace
WERD * make_real_word(BLOBNBOX_IT *box_it, inT32 blobcount, BOOL8 bol, uinT8 blanks)
TBOX bounding_box() const
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
float compute_pitch_sd(TO_ROW *row, STATS *projection, inT16 projection_left, inT16 projection_right, float space_size, float initial_pitch, float &sp_sd, inT16 &mid_cuts, ICOORDELT_LIST *row_cells, BOOL8 testing_on, inT16 start, inT16 end)
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
bool set_range(inT32 min_bucket_value, inT32 max_bucket_value_plus_1)
EXTERN double textord_words_def_fixed
EXTERN bool textord_blocksall_fixed
EXTERN bool textord_fast_pitch_test
EXTERN bool textord_show_initial_words