39 #define MAXSPACING 128 53 inT16 block_space_gap_width;
55 inT16 block_non_space_gap_width;
56 BOOL8 old_text_ord_proportional;
59 block_it.set_to_list (blocks);
61 for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
62 block_it.forward ()) {
63 block = block_it.data ();
64 gapmap =
new GAPMAP (block);
65 block_spacing_stats(block,
67 old_text_ord_proportional,
68 block_space_gap_width,
69 block_non_space_gap_width);
77 (
float) block_space_gap_width / block_non_space_gap_width < 3.0) {
78 block_non_space_gap_width = (
inT16) floor (block_space_gap_width / 3.0);
80 row_it.set_to_list (block->
get_rows ());
82 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
87 tprintf (
"Block %d Row %d: Now Proportional\n",
88 block_index, row_index);
89 row_spacing_stats(row,
93 block_space_gap_width,
94 block_non_space_gap_width);
99 (
"Block %d Row %d: Now Fixed Pitch Decision:%d fp flag:%f\n",
103 #ifndef GRAPHICS_DISABLED 119 void Textord::block_spacing_stats(
122 BOOL8 &old_text_ord_proportional,
123 inT16 &block_space_gap_width,
124 inT16 &block_non_space_gap_width
137 inT16 centre_to_centre;
139 float real_space_threshold;
140 float iqr_centre_to_centre;
141 float iqr_all_gap_stats;
145 row_it.set_to_list (block->
get_rows ());
146 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
147 row = row_it.data ();
153 blob_it.mark_cycle_pt ();
154 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
158 blob_box = reduced_box_next (row, &blob_it);
161 row_length = end_of_row - blob_box.
left ();
162 if (blob_box.
width () < minwidth)
163 minwidth = blob_box.
width ();
164 prev_blob_box = blob_box;
165 while (!blob_it.cycled_list ()) {
169 blob_box = reduced_box_next (row, &blob_it);
172 if (blob_box.
width () < minwidth)
173 minwidth = blob_box.
width ();
174 gap_width = blob_box.
left () - prev_blob_box.
right ();
175 if (!ignore_big_gap (row, row_length, gapmap,
176 prev_blob_box.
right (), blob_box.
left ())) {
177 all_gap_stats.add (gap_width, 1);
179 centre_to_centre = (blob_box.
left () + blob_box.
right () -
180 (prev_blob_box.
left () +
181 prev_blob_box.
right ())) / 2;
183 centre_to_centre_stats.add (centre_to_centre, 1);
186 prev_blob_box = blob_box;
192 if (all_gap_stats.get_total () <= 1) {
193 block_non_space_gap_width = minwidth;
194 block_space_gap_width = -1;
196 old_text_ord_proportional =
TRUE;
200 iqr_centre_to_centre = centre_to_centre_stats.ile (0.75) -
201 centre_to_centre_stats.ile (0.25);
202 iqr_all_gap_stats = all_gap_stats.ile (0.75) - all_gap_stats.ile (0.25);
203 old_text_ord_proportional =
204 iqr_centre_to_centre * 2 > iqr_all_gap_stats;
216 block_non_space_gap_width = (
inT16) floor (all_gap_stats.median ());
219 row_it.set_to_list (block->
get_rows ());
220 for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
221 row = row_it.data ();
226 real_space_threshold =
230 blob_it.mark_cycle_pt ();
232 blob_it.data_relative (-1)->bounding_box ().right ();
236 blob_box = reduced_box_next (row, &blob_it);
239 row_length = blob_box.
left () - end_of_row;
240 prev_blob_box = blob_box;
241 while (!blob_it.cycled_list ()) {
245 blob_box = reduced_box_next (row, &blob_it);
248 gap_width = blob_box.
left () - prev_blob_box.
right ();
249 if ((gap_width > real_space_threshold) &&
250 !ignore_big_gap (row, row_length, gapmap,
251 prev_blob_box.
right (),
266 || (!narrow_blob (row, prev_blob_box)
267 && !narrow_blob (row, blob_box))))
268 || (wide_blob (row, prev_blob_box)
269 && wide_blob (row, blob_box)))
270 space_gap_stats.add (gap_width, 1);
272 prev_blob_box = blob_box;
277 if (space_gap_stats.get_total () <= 2)
278 block_space_gap_width = -1;
280 block_space_gap_width =
281 MAX ((
inT16) floor (space_gap_stats.median ()),
282 3 * block_non_space_gap_width);
292 void Textord::row_spacing_stats(
297 inT16 block_space_gap_width,
298 inT16 block_non_space_gap_width
309 inT16 real_space_threshold = 0;
312 inT16 large_gap_count = 0;
313 BOOL8 suspected_table;
314 inT32 max_max_nonspace;
315 BOOL8 good_block_space_estimate = block_space_gap_width > 0;
317 inT32 row_length = 0;
319 inT32 sane_threshold;
323 if (!good_block_space_estimate)
324 block_space_gap_width =
inT16 (floor (row->
xheight / 2));
327 real_space_threshold =
328 block_non_space_gap_width +
331 block_non_space_gap_width)));
333 real_space_threshold =
334 (block_space_gap_width + block_non_space_gap_width) / 2;
336 blob_it.mark_cycle_pt ();
337 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
341 blob_box = reduced_box_next (row, &blob_it);
344 row_length = end_of_row - blob_box.
left ();
345 prev_blob_box = blob_box;
346 while (!blob_it.cycled_list ()) {
350 blob_box = reduced_box_next (row, &blob_it);
353 gap_width = blob_box.
left () - prev_blob_box.
right ();
354 if (ignore_big_gap (row, row_length, gapmap,
355 prev_blob_box.
right (), blob_box.
left ()))
358 if (gap_width >= real_space_threshold) {
363 || (!narrow_blob (row, prev_blob_box)
364 && !narrow_blob (row, blob_box))))
365 || (wide_blob (row, prev_blob_box)
366 && wide_blob (row, blob_box)))
367 cert_space_gap_stats.add (gap_width, 1);
368 all_space_gap_stats.add (gap_width, 1);
371 small_gap_stats.add (gap_width, 1);
372 all_gap_stats.add (gap_width, 1);
374 prev_blob_box = blob_box;
377 suspected_table = (large_gap_count > 1) ||
378 ((large_gap_count > 0) &&
383 if ((cert_space_gap_stats.get_total () >=
387 cert_space_gap_stats.get_total () > 0)) {
390 &cert_space_gap_stats,
392 block_space_gap_width,
393 block_non_space_gap_width);
396 !isolated_row_stats (row, gapmap, &all_gap_stats, suspected_table,
397 block_idx, row_idx)) {
399 tprintf (
"B:%d R:%d -- Inadequate certain spaces.\n",
405 row->
kern_size = all_gap_stats.median ();
407 row->
kern_size = block_non_space_gap_width;
415 &all_space_gap_stats,
417 block_space_gap_width,
418 block_non_space_gap_width);
423 improve_row_threshold(row, &all_gap_stats);
428 if (suspected_table &&
431 tprintf(
"B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f.\n", block_idx,
444 if (good_block_space_estimate &&
446 sane_space = block_space_gap_width;
452 tprintf(
"B:%d R:%d -- DON'T BELIEVE SPACE %3.2f %d %3.2f -> %3.2f.\n",
465 tprintf(
"B:%d R:%d -- DON'T BELIEVE THRESH %3.2f %d %3.2f->%d.\n",
473 if (suspected_table) {
476 sane_threshold =
inT32 (floor ((sane_space + row->
kern_size) / 2));
481 tprintf (
"B:%d R:%d -- SUSPECT NO SPACES %3.2f %d %3.2f.\n",
526 for (index = 0; index <= max_max_nonspace; index++) {
527 if (all_gap_stats.pile_count (index) > max)
528 max = all_gap_stats.pile_count (index);
530 (all_gap_stats.pile_count (index) < 0.1 * max)) {
574 (
"B:%d R:%d L:%d-- Kn:%d Sp:%d Thr:%d -- Kn:%3.2f (%d) Thr:%d (%d) Sp:%3.2f\n",
575 block_idx, row_idx, row_length, block_non_space_gap_width,
576 block_space_gap_width, real_space_threshold, row->
kern_size,
580 tprintf(
"row->kern_size = %3.2f, row->space_size = %3.2f, " 581 "row->space_threshold = %d\n",
585 void Textord::old_to_method(
587 STATS *all_gap_stats,
588 STATS *space_gap_stats,
589 STATS *small_gap_stats,
590 inT16 block_space_gap_width,
591 inT16 block_non_space_gap_width
599 if (row->
space_size > block_space_gap_width * 1.5) {
601 row->
space_size = block_space_gap_width * 1.5;
606 if (row->
space_size < (block_non_space_gap_width * 2) + 1)
607 row->
space_size = (block_non_space_gap_width * 2) + 1;
610 else if (space_gap_stats->
get_total () >= 1) {
613 if (row->
space_size > block_space_gap_width * 1.5) {
615 row->
space_size = block_space_gap_width * 1.5;
620 if (row->
space_size < (block_non_space_gap_width * 3) + 1)
621 row->
space_size = (block_non_space_gap_width * 3) + 1;
635 row->
kern_size = block_non_space_gap_width;
678 STATS *all_gap_stats,
679 BOOL8 suspected_table,
683 float crude_threshold_estimate;
684 inT16 small_gaps_count;
697 kern_estimate = all_gap_stats->
median ();
700 small_gaps_count = stats_count_under (all_gap_stats,
702 ceil (crude_threshold_estimate));
707 (total - small_gaps_count < 1)) {
709 tprintf(
"B:%d R:%d -- Can't do isolated row stats.\n", block_idx,
714 blob_it.mark_cycle_pt ();
715 end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
719 blob_box = reduced_box_next (row, &blob_it);
722 row_length = end_of_row - blob_box.
left ();
723 prev_blob_box = blob_box;
724 while (!blob_it.cycled_list ()) {
728 blob_box = reduced_box_next (row, &blob_it);
731 gap_width = blob_box.
left () - prev_blob_box.
right ();
732 if (!ignore_big_gap (row, row_length, gapmap,
733 prev_blob_box.
right (), blob_box.
left ()) &&
734 (gap_width > crude_threshold_estimate)) {
738 (!narrow_blob (row, prev_blob_box) &&
739 !narrow_blob (row, blob_box)))) ||
740 (wide_blob (row, prev_blob_box) && wide_blob (row, blob_box)))
741 cert_space_gap_stats.add (gap_width, 1);
742 all_space_gap_stats.add (gap_width, 1);
744 if (gap_width < crude_threshold_estimate)
745 small_gap_stats.
add (gap_width, 1);
747 prev_blob_box = blob_box;
749 if (cert_space_gap_stats.get_total () >=
752 row->
space_size = cert_space_gap_stats.median ();
753 else if (suspected_table && (cert_space_gap_stats.get_total () > 0))
755 row->
space_size = cert_space_gap_stats.mean ();
757 else if (all_space_gap_stats.get_total () >=
760 row->
space_size = all_space_gap_stats.median ();
762 row->
space_size = all_space_gap_stats.mean ();
775 tprintf (
"B:%d R:%d -- Isolated row stats SANITY FAILURE: %f %d %f\n",
785 tprintf (
"B:%d R:%d -- Isolated row stats: %f %d %f\n",
795 for (index = 0; index < threshold; index++)
816 void Textord::improve_row_threshold(
TO_ROW *row,
STATS *all_gap_stats) {
819 inT16 reqd_zero_width = 0;
820 inT16 zero_width = 0;
821 inT16 zero_start = 0;
825 tprintf (
"Improve row threshold 0");
826 if ((all_gap_stats->
get_total () <= 25) ||
829 (stats_count_under (all_gap_stats,
830 (
inT16) ceil (kn + (sp - kn) / 3 + 0.5)) <
840 reqd_zero_width = (
inT16) floor ((sp - kn) / 3 + 0.5);
841 if (reqd_zero_width < 3)
844 for (index =
inT16 (ceil (kn)); index <
inT16 (floor (sp)); index++) {
851 if (zero_width >= reqd_zero_width)
860 tprintf (
" reqd_z_width: %d found %d 0's, starting %d; thresh: %d/n",
862 if ((zero_width < reqd_zero_width) ||
871 (
"Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
878 (
"Improve row kn:%5.2f sp:%5.2f 0's: %d -> %d thresh:%d -> %d\n",
898 BOOL8 prev_fuzzy_non;
900 BOOL8 fuzzy_sp =
false;
901 BOOL8 fuzzy_non =
false;
906 C_OUTLINE_IT cout_it;
908 C_BLOB_IT cblob_it = &cblobs;
914 float repetition_spacing;
928 inT16 word_count = 0;
930 rep_char_it.set_to_list (&(row->
rep_words));
931 if (!rep_char_it.empty ()) {
932 next_rep_char_word_right =
933 rep_char_it.data ()->bounding_box ().right ();
937 cblob_it.set_to_list (&cblobs);
939 word_it.set_to_list (&words);
942 prev_fuzzy_sp =
FALSE;
943 prev_fuzzy_non =
FALSE;
944 if (!box_it.empty ()) {
945 xstarts[0] = box_it.data ()->bounding_box ().left ();
946 if (xstarts[0] > next_rep_char_word_right) {
948 word = rep_char_it.extract ();
949 word_it.add_after_then_move (word);
959 repetition_spacing = find_mean_blob_spacing (word);
960 current_gap = box_it.data ()->bounding_box ().left () -
961 next_rep_char_word_right;
962 current_within_xht_gap = current_gap;
971 tprintf (
"Repch wd at BOL(%d, %d). rep spacing %5.2f; Rgap:%d ",
972 box_it.data ()->bounding_box ().left (),
973 box_it.data ()->bounding_box ().bottom (),
974 repetition_spacing, current_gap);
975 prev_fuzzy_sp =
FALSE;
976 prev_fuzzy_non =
FALSE;
977 if (rep_char_it.empty ()) {
981 rep_char_it.forward ();
982 next_rep_char_word_right =
983 rep_char_it.data ()->bounding_box ().right ();
987 peek_at_next_gap(row,
991 next_within_xht_gap);
993 bblob = box_it.data ();
996 if (bblob->
cblob () != NULL) {
997 cout_it.set_to_list (cblob_it.data ()->out_list ());
998 cout_it.move_to_last ();
1000 delete bblob->
cblob ();
1003 if (bblob->
cblob() != NULL)
1004 cblob_it.add_after_then_move (bblob->
cblob ());
1005 prev_x = blob_box.
right ();
1008 bblob = box_it.data ();
1013 prev_gap = current_gap;
1014 prev_within_xht_gap = current_within_xht_gap;
1015 prev_blob_box = next_blob_box;
1016 current_gap = next_gap;
1017 current_within_xht_gap = next_within_xht_gap;
1018 peek_at_next_gap(row,
1022 next_within_xht_gap);
1024 inT16 prev_gap_arg = prev_gap;
1025 inT16 next_gap_arg = next_gap;
1027 prev_gap_arg = prev_within_xht_gap;
1028 next_gap_arg = next_within_xht_gap;
1031 if (blob_box.
left () > next_rep_char_word_right ||
1032 make_a_word_break(row, blob_box, prev_gap_arg, prev_blob_box,
1033 current_gap, current_within_xht_gap,
1034 next_blob_box, next_gap_arg,
1035 blanks, fuzzy_sp, fuzzy_non,
1036 prev_gap_was_a_space,
1037 break_at_next_gap) ||
1038 box_it.at_first()) {
1040 word =
new WERD (&cblobs, prev_blanks, NULL);
1042 word_it.add_after_then_move (word);
1050 else if (prev_fuzzy_non)
1054 if (blob_box.
left () > next_rep_char_word_right) {
1056 word = rep_char_it.extract ();
1057 word_it.add_after_then_move (word);
1060 repetition_spacing = find_mean_blob_spacing (word);
1062 current_within_xht_gap = current_gap;
1073 (
"Repch wd (%d,%d) rep gap %5.2f; Lgap:%d (%d blanks);",
1076 repetition_spacing, current_gap, blanks);
1084 blob_box.
left () - next_rep_char_word_right;
1093 tprintf (
" Rgap:%d (%d blanks)\n",
1094 current_gap, blanks);
1098 if (rep_char_it.empty ()) {
1102 rep_char_it.forward ();
1103 next_rep_char_word_right =
1104 rep_char_it.data ()->bounding_box ().right ();
1108 if (box_it.at_first () && rep_char_it.empty ()) {
1111 xstarts[1] = prev_x;
1114 prev_blanks = blanks;
1115 prev_fuzzy_sp = fuzzy_sp;
1116 prev_fuzzy_non = fuzzy_non;
1121 while (!box_it.at_first ());
1124 while (!rep_char_it.empty ()) {
1125 word = rep_char_it.extract ();
1126 word_it.add_after_then_move (word);
1129 repetition_spacing = find_mean_blob_spacing (word);
1140 "Repch wd at EOL (%d,%d). rep spacing %5.2f; Lgap:%d (%d blanks)\n",
1142 repetition_spacing, current_gap, blanks);
1148 if (rep_char_it.empty ()) {
1151 xstarts[1] = prev_x;
1154 rep_char_it.forward ();
1157 real_row =
new ROW (row,
1159 word_it.set_to_list (real_row->
word_list ());
1161 word_it.add_list_after (&words);
1165 tprintf (
"Row: Made %d words in row ((%d,%d)(%d,%d))\n",
1189 C_OUTLINE_IT cout_it;
1191 C_BLOB_IT cblob_it = &cblobs;
1198 inT16 word_count = 0;
1200 cblob_it.set_to_list(&cblobs);
1202 word_it.set_to_list(&words);
1204 if (!box_it.empty()) {
1207 bblob = box_it.data();
1210 if (bblob->
cblob() != NULL) {
1211 cout_it.set_to_list(cblob_it.data()->out_list());
1212 cout_it.move_to_last();
1214 delete bblob->
cblob();
1217 if (bblob->
cblob() != NULL)
1218 cblob_it.add_after_then_move(bblob->
cblob());
1221 bblob = box_it.data();
1225 word =
new WERD(&cblobs, 1, NULL);
1227 word_it.add_after_then_move(word);
1232 if (box_it.at_first()) {
1237 while (!box_it.at_first());
1240 word_it.set_to_list(real_row->
word_list());
1242 word_it.add_list_after(&words);
1245 tprintf (
"Row:Made %d words in row ((%d,%d)(%d,%d))\n",
1257 BOOL8 Textord::make_a_word_break(
1262 inT16 real_current_gap,
1263 inT16 within_xht_current_gap,
1269 BOOL8& prev_gap_was_a_space,
1270 BOOL8& break_at_next_gap) {
1273 float fuzzy_sp_to_kn_limit;
1275 if (break_at_next_gap) {
1276 break_at_next_gap =
FALSE;
1287 (real_current_gap < tosp_dont_fool_with_small_kerns * row->kern_size)))
1289 within_xht_current_gap = real_current_gap;
1292 current_gap = within_xht_current_gap;
1294 current_gap = real_current_gap;
1299 if (space && (current_gap <
MAX_INT16)) {
1300 if (current_gap < row->min_space) {
1325 prev_gap_was_a_space =
TRUE;
1335 int num_blanks = current_gap;
1347 (real_current_gap <= row->max_nonspace) &&
1351 #ifndef GRAPHICS_DISABLED 1352 mark_gap (blob_box, 20,
1353 prev_gap, prev_blob_box.
width (),
1354 current_gap, next_blob_box.
width (), next_gap);
1358 (real_current_gap <= row->space_threshold) &&
1365 #ifndef GRAPHICS_DISABLED 1366 mark_gap (blob_box, 21,
1367 prev_gap, prev_blob_box.
width (),
1368 current_gap, next_blob_box.
width (), next_gap);
1372 (real_current_gap < row->min_space) &&
1373 (within_xht_current_gap >= row->
min_space)) {
1375 #ifndef GRAPHICS_DISABLED 1376 mark_gap (blob_box, 22,
1377 prev_gap, prev_blob_box.
width (),
1378 current_gap, next_blob_box.
width (), next_gap);
1382 !suspected_punct_blob(row, prev_blob_box) &&
1383 suspected_punct_blob(row, blob_box)) {
1384 break_at_next_gap =
TRUE;
1387 else if ((current_gap < row->min_space) &&
1395 fuzzy_sp_to_kn_limit = 99999.0f;
1399 if ((prev_blob_box.
width () > 0) &&
1400 narrow_blob (row, prev_blob_box) &&
1401 prev_gap_was_a_space &&
1404 (current_gap > fuzzy_sp_to_kn_limit)) {
1412 #ifndef GRAPHICS_DISABLED 1413 mark_gap (blob_box, 1,
1414 prev_gap, prev_blob_box.
width (),
1415 current_gap, next_blob_box.
width (), next_gap);
1420 else if ((prev_blob_box.
width () > 0) &&
1421 narrow_blob (row, prev_blob_box) &&
1422 !prev_gap_was_a_space &&
1425 (current_gap > fuzzy_sp_to_kn_limit)) {
1433 #ifndef GRAPHICS_DISABLED 1434 mark_gap (blob_box, 2,
1435 prev_gap, prev_blob_box.
width (),
1436 current_gap, next_blob_box.
width (), next_gap);
1439 else if ((next_blob_box.
width () > 0) &&
1440 narrow_blob (row, next_blob_box) &&
1444 (current_gap > fuzzy_sp_to_kn_limit)) {
1452 #ifndef GRAPHICS_DISABLED 1453 mark_gap (blob_box, 3,
1454 prev_gap, prev_blob_box.
width (),
1455 current_gap, next_blob_box.
width (), next_gap);
1458 else if ((next_blob_box.
width () > 0) &&
1459 narrow_blob (row, next_blob_box) &&
1463 (current_gap > fuzzy_sp_to_kn_limit)) {
1471 #ifndef GRAPHICS_DISABLED 1472 mark_gap (blob_box, 4,
1473 prev_gap, prev_blob_box.
width (),
1474 current_gap, next_blob_box.
width (), next_gap);
1477 else if ((((next_blob_box.
width () > 0) &&
1478 narrow_blob (row, next_blob_box)) ||
1479 ((prev_blob_box.
width () > 0) &&
1480 narrow_blob (row, prev_blob_box)))) {
1482 #ifndef GRAPHICS_DISABLED 1483 mark_gap (blob_box, 6,
1484 prev_gap, prev_blob_box.
width (),
1485 current_gap, next_blob_box.
width (), next_gap);
1499 if ((prev_blob_box.
width () > 0) &&
1500 (next_blob_box.
width () > 0) &&
1503 wide_blob (row, prev_blob_box) &&
1504 wide_blob (row, next_blob_box)) {
1518 #ifndef GRAPHICS_DISABLED 1519 mark_gap (blob_box, 7,
1520 prev_gap, prev_blob_box.
width (),
1521 current_gap, next_blob_box.
width (), next_gap);
1523 }
else if (prev_blob_box.
width() > 0 &&
1524 next_blob_box.
width() > 0 &&
1528 !(narrow_blob(row, prev_blob_box) ||
1529 suspected_punct_blob(row, prev_blob_box)) &&
1530 !(narrow_blob(row, next_blob_box) ||
1531 suspected_punct_blob(row, next_blob_box))) {
1534 #ifndef GRAPHICS_DISABLED 1535 mark_gap (blob_box, 8,
1536 prev_gap, prev_blob_box.
width (),
1537 current_gap, next_blob_box.
width (), next_gap);
1541 (prev_blob_box.
width () > 0) &&
1542 (next_blob_box.
width () > 0) &&
1545 (!suspected_punct_blob (row, prev_blob_box) &&
1546 !suspected_punct_blob (row, next_blob_box)))) {
1549 #ifndef GRAPHICS_DISABLED 1550 mark_gap (blob_box, 9,
1551 prev_gap, prev_blob_box.
width (),
1552 current_gap, next_blob_box.
width (), next_gap);
1557 tprintf(
"word break = %d current_gap = %d, prev_gap = %d, " 1558 "next_gap = %d\n", space ? 1 : 0, current_gap,
1559 prev_gap, next_gap);
1560 prev_gap_was_a_space = space && !(fuzzy_non);
1568 (((
float) blob_box.
width () / blob_box.
height ()) <=
1578 (((
float) blob_box.
width () / blob_box.
height ()) >
1584 result = !narrow_blob (row, blob_box);
1591 float blob_x_centre;
1593 blob_x_centre = (box.
right () + box.
left ()) / 2.0;
1603 void Textord::peek_at_next_gap(
TO_ROW *row,
1605 TBOX &next_blob_box,
1607 inT16 &next_within_xht_gap) {
1608 TBOX next_reduced_blob_box;
1610 BLOBNBOX_IT reduced_box_it = box_it;
1612 next_blob_box =
box_next (&box_it);
1613 next_reduced_blob_box = reduced_box_next (row, &reduced_box_it);
1614 if (box_it.at_first ()) {
1619 bit_beyond = box_it.data ()->bounding_box ();
1620 next_gap = bit_beyond.
left () - next_blob_box.
right ();
1621 bit_beyond = reduced_box_next (row, &reduced_box_it);
1622 next_within_xht_gap =
1623 bit_beyond.
left () - next_reduced_blob_box.
right ();
1628 #ifndef GRAPHICS_DISABLED 1629 void Textord::mark_gap(
1633 inT16 prev_blob_width,
1635 inT16 next_blob_width,
1691 blob.
left () - current_gap / 2.0f,
1696 tprintf(
" (%d,%d) Sp<->Kn Rule %d %d %d %d %d %d\n",
1697 blob.
left() - current_gap / 2, blob.
bottom(), rule, prev_gap,
1698 prev_blob_width, current_gap, next_blob_width, next_gap);
1702 float Textord::find_mean_blob_spacing(
WERD *word) {
1706 inT16 gap_count = 0;
1710 if (!cblob_it.empty ()) {
1711 cblob_it.mark_cycle_pt ();
1712 prev_right = cblob_it.data ()->bounding_box ().
right ();
1714 cblob_it.forward ();
1715 for (; !cblob_it.cycled_list (); cblob_it.forward ()) {
1716 blob_box = cblob_it.data ()->bounding_box ();
1717 gap_sum += blob_box.
left () - prev_right;
1719 prev_right = blob_box.
right ();
1723 return (gap_sum / (
float) gap_count);
1734 inT16 gap = right - left + 1;
1742 if ((gap > 2.1 * row->
xheight) && (row_length > 20 * row->
xheight))
1744 if ((gap > 1.75 * row->
xheight) &&
1745 ((row_length > 35 * row->
xheight) ||
1766 TBOX Textord::reduced_box_next(
1774 inT16 left_above_xht;
1775 inT16 new_left_above_xht;
1789 reduced_box = reduced_box_for_blob (blob, row, &left_above_xht);
1793 if (blob->
cblob() == NULL)
1798 reduced_box_for_blob(blob, row, &new_left_above_xht);
1799 left_above_xht =
MIN (left_above_xht, new_left_above_xht);
1805 if ((reduced_box.
width () > 0) &&
1807 < left_above_xht) && (reduced_box.
height () > 0.7 * row->
xheight)) {
1808 #ifndef GRAPHICS_DISABLED 1814 reduced_box = full_box;
1840 TBOX Textord::reduced_box_for_blob(
1843 inT16 *left_above_xht) {
1845 float blob_x_centre;
1854 blob_x_centre = (blob_box.
left () + blob_box.
right ()) / 2.0;
1864 static_cast<float>(
MAX_INT16), left_limit, junk);
1865 if (left_limit > junk)
1868 *left_above_xht = (
inT16) floor (left_limit);
1878 if (left_limit > junk)
1887 if (junk > right_limit)
ROW * make_prop_words(TO_ROW *row, FCOORD rotation)
const TBOX & bounding_box() const
double tosp_wide_fraction
void set_reduced_box(TBOX new_box)
bool tosp_only_small_gaps_for_kern
bool tosp_force_wordbreak_on_punct
double tosp_init_guess_kn_mult
bool tosp_fuzzy_limit_all
bool tosp_flip_fuzz_sp_to_kn
TBOX bounding_box() const
int IntCastRounded(double x)
double tosp_narrow_fraction
bool tosp_use_pre_chopping
void set_blanks(uinT8 new_blanks)
double tosp_narrow_aspect_ratio
bool tosp_all_flips_fuzzy
ROW * make_blob_words(TO_ROW *row, FCOORD rotation)
PITCH_TYPE pitch_decision
void add(inT32 value, inT32 count)
double tosp_table_xht_sp_ratio
BOOL8 table_gap(inT16 left, inT16 right)
double tosp_large_kerning
double tosp_kern_gap_factor2
double tosp_silly_kn_sp_gap
T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound)
C_OUTLINE_LIST * out_list()
double tosp_ignore_big_gaps
bool tosp_only_use_prop_rows
void plot_word_decisions(ScrollView *win, inT16 pitch, TO_ROW *row)
bool tosp_stats_use_xht_gaps
void recalc_bounding_box()
void Ellipse(int x, int y, int width, int height)
double tosp_init_guess_xht_mult
int tosp_enough_space_samples_for_median
double tosp_threshold_bias1
BLOBNBOX_LIST * blob_list()
double tosp_threshold_bias2
bool tosp_old_to_constrain_sp_kn
double tosp_fuzzy_space_factor1
bool joined_to_prev() const
bool tosp_flip_fuzz_kn_to_sp
double tosp_kern_gap_factor3
double tosp_ignore_very_big_gaps
double tosp_pass_wide_fuzz_sp_to_context
void plot(ScrollView *fd) const
TBOX box_next(BLOBNBOX_IT *it)
EXTERN double gapmap_big_gaps
double tosp_table_fuzzy_kn_sp_ratio
double tosp_kern_gap_factor1
double tosp_max_sane_kn_thresh
bool tosp_block_use_cert_spaces
double tosp_fuzzy_sp_fraction
double tosp_wide_aspect_ratio
bool tosp_narrow_blobs_not_cert
double tosp_old_sp_kn_th_factor
bool tosp_rule_9_test_punct
double tosp_enough_small_gaps
double tosp_fuzzy_kn_fraction
double tosp_fuzzy_space_factor
inT32 pile_count(inT32 value) const
void to_spacing(ICOORD page_tr, TO_BLOCK_LIST *blocks)
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
bool tosp_row_use_cert_spaces1
const TBOX & reduced_box() const
EXTERN ScrollView * to_win
void set_flag(WERD_FLAGS mask, BOOL8 value)
C_BLOB_LIST * cblob_list()
double tosp_min_sane_kn_sp
bool tosp_row_use_cert_spaces
bool tosp_only_use_xht_gaps
bool tosp_recovery_isolated_row_stats
void find_cblob_hlimits(C_BLOB *blob, float bottomy, float topy, float &xmin, float &xmax)
TBOX bounding_box() const
double tosp_table_kn_sp_ratio
double tosp_dont_fool_with_small_kerns
double tosp_fuzzy_space_factor2
EXTERN bool textord_show_initial_words