22 #pragma warning(disable:4244) // Conversion warnings 87 blob_type_(blob_type), flow_(
BTFT_NONE), good_blob_score_(0),
88 good_width_(false), good_column_(false),
89 left_key_tab_(false), right_key_tab_(false),
90 left_key_(0), right_key_(0), type_(
PT_UNKNOWN), vertical_(vertical),
91 working_set_(NULL), last_add_was_vertical_(false), block_owned_(false),
92 desperately_merged_(false),
93 first_column_(-1), last_column_(-1), column_set_(NULL),
94 side_step_(0), top_spacing_(0), bottom_spacing_(0),
95 type_before_table_(
PT_UNKNOWN), inside_table_column_(false),
96 nearest_neighbor_above_(NULL), nearest_neighbor_below_(NULL),
97 space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0),
99 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
129 ColPartition_LIST* big_part_list) {
138 if (big_part_list != NULL) {
139 ColPartition_IT part_it(big_part_list);
140 part_it.add_to_end(single);
148 ColPartition_C_IT it(&upper_partners_);
149 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
150 it.data()->RemovePartner(
false,
this);
152 it.set_to_list(&lower_partners_);
153 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
154 it.data()->RemovePartner(
true,
this);
162 int left,
int bottom,
163 int right,
int top) {
165 part->bounding_box_ =
TBOX(left, bottom, right, top);
166 part->median_bottom_ = bottom;
167 part->median_top_ = top;
168 part->median_size_ = top - bottom;
169 part->median_width_ = right - left;
182 if (boxes_.length() == 0) {
185 bounding_box_ += box;
189 if (!last_add_was_vertical_) {
190 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
191 last_add_was_vertical_ =
true;
193 boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>,
true, bbox);
195 if (last_add_was_vertical_) {
196 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
197 last_add_was_vertical_ =
false;
199 boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>,
true, bbox);
206 tprintf(
"Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
208 bounding_box_.
left(), bounding_box_.
right());
213 BLOBNBOX_C_IT bb_it(&boxes_);
214 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
215 if (box == bb_it.data()) {
227 BLOBNBOX_C_IT bb_it(&boxes_);
228 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
231 if (biggest == NULL ||
235 if (biggest == NULL ||
246 BLOBNBOX_C_IT bb_it(&boxes_);
247 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
248 if (box != bb_it.data()) {
249 result += bb_it.data()->bounding_box();
258 BLOBNBOX_C_IT bb_it(&boxes_);
259 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
274 BLOBNBOX_C_IT bb_it(&boxes_);
275 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
287 BLOBNBOX_C_IT bb_it(&boxes_);
288 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
290 if (bblob->
owner() ==
this)
300 BLOBNBOX_C_IT bb_it(&boxes_);
301 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
308 if (bb_it.empty())
return false;
319 for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
321 delete bblob->
cblob();
331 BLOBNBOX_CLIST reversed_boxes;
332 BLOBNBOX_C_IT reversed_it(&reversed_boxes);
334 BLOBNBOX_C_IT bb_it(&boxes_);
335 for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
336 reversed_it.add_before_then_move(bb_it.extract());
338 bb_it.add_list_after(&reversed_boxes);
340 int tmp = left_margin_;
341 left_margin_ = -right_margin_;
342 right_margin_ = -tmp;
353 if (bounding_box_.
left() > bounding_box_.
right()) {
355 tprintf(
"Bounding box invalid\n");
360 if (left_margin_ > bounding_box_.
left() ||
361 right_margin_ < bounding_box_.
right()) {
370 tprintf(
"Key inside box: %d v %d or %d v %d\n",
381 int y = (
MidY() + other.
MidY()) / 2;
424 if (bounding_box_.
right() < other.bounding_box_.
left() &&
427 if (other.bounding_box_.
right() < bounding_box_.
left() &&
430 if (bounding_box_.
left() > other.bounding_box_.
right() &&
433 if (other.bounding_box_.
left() > bounding_box_.
right() &&
441 double fractional_tolerance,
442 double constant_tolerance)
const {
444 int nonmatch_count = 0;
445 BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
446 BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
447 box_it.mark_cycle_pt();
448 other_it.mark_cycle_pt();
449 while (!box_it.cycled_list() && !other_it.cycled_list()) {
450 if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
451 fractional_tolerance,
459 return match_count > nonmatch_count;
470 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
473 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
477 tprintf(
"Blob is not a diacritic:");
489 bool result = min_top > candidate.median_bottom_ &&
490 max_bottom < candidate.median_top_;
495 tprintf(
"y ranges don\'t overlap: %d-%d / %d-%d\n",
496 max_bottom, min_top, median_bottom_, median_top_);
505 if (tab_vector != NULL) {
509 left_key_tab_ =
false;
517 if (tab_vector != NULL) {
518 right_key_ = tab_vector->
sort_key();
521 right_key_tab_ =
false;
530 left_key_tab_ = take_box ? false : src.left_key_tab_;
532 left_key_ = src.left_key_;
537 if (left_margin_ > bounding_box_.
left())
538 left_margin_ = src.left_margin_;
543 right_key_tab_ = take_box ? false : src.right_key_tab_;
544 if (right_key_tab_) {
545 right_key_ = src.right_key_;
550 if (right_margin_ < bounding_box_.
right())
551 right_margin_ = src.right_margin_;
556 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
557 return it.data()->left_rule();
561 BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
563 return it.data()->right_rule();
568 return special_blobs_densities_[
type];
573 BLOBNBOX_C_IT blob_it(&boxes_);
575 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
589 special_blobs_densities_[
type] = density;
593 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
594 if (boxes_.empty()) {
598 BLOBNBOX_C_IT blob_it(&boxes_);
599 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
602 special_blobs_densities_[
type]++;
606 special_blobs_densities_[
type] /= boxes_.length();
615 partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
617 upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
619 partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
621 lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
true, partner);
629 ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
630 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
631 if (it.data() == partner) {
640 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
641 if (!partners->singleton())
643 ColPartition_C_IT it(partners);
655 bounding_box_.
bottom()) ||
657 other->bounding_box_.
bottom())) {
664 memset(special_blobs_densities_, 0,
sizeof(special_blobs_densities_));
666 int w1 = boxes_.length(), w2 = other->boxes_.length();
667 float new_val = special_blobs_densities_[
type] * w1 +
668 other->special_blobs_densities_[
type] * w2;
670 special_blobs_densities_[
type] = new_val / (w1 + w2);
675 BLOBNBOX_C_IT it(&boxes_);
676 BLOBNBOX_C_IT it2(&other->boxes_);
677 for (; !it2.empty(); it2.forward()) {
680 if (prev_owner != other && prev_owner != NULL) {
684 ASSERT_HOST(prev_owner == other || prev_owner == NULL);
685 if (prev_owner == other)
687 it.add_to_end(bbox2);
689 left_margin_ =
MIN(left_margin_, other->left_margin_);
690 right_margin_ =
MAX(right_margin_, other->right_margin_);
691 if (other->left_key_ < left_key_) {
692 left_key_ = other->left_key_;
693 left_key_tab_ = other->left_key_tab_;
695 if (other->right_key_ > right_key_) {
696 right_key_ = other->right_key_;
697 right_key_tab_ = other->right_key_tab_;
702 flow_ = other->flow_;
703 blob_type_ = other->blob_type_;
707 boxes_.sort(SortByBoxBottom<BLOBNBOX>);
708 last_add_was_vertical_ =
true;
710 boxes_.sort(SortByBoxLeft<BLOBNBOX>);
711 last_add_was_vertical_ =
false;
716 for (
int upper = 0; upper < 2; ++upper) {
717 ColPartition_CLIST partners;
718 ColPartition_C_IT part_it(&partners);
719 part_it.add_list_after(upper ? &other->upper_partners_
720 : &other->lower_partners_);
721 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
746 int ok_box_overlap,
bool debug) {
750 tprintf(
"Vertical partition\n");
764 if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
765 merged_box.bottom() < bounding_box_.
top() - ok_box_overlap &&
766 merged_box.top() > bounding_box_.
bottom() + ok_box_overlap) {
768 tprintf(
"Excessive box overlap\n");
778 if (boxes_.empty() || boxes_.singleton())
780 BLOBNBOX_C_IT it(&boxes_);
781 TBOX left_box(it.data()->bounding_box());
782 for (it.forward(); !it.at_first(); it.forward()) {
785 if (left_box.overlap(box))
798 BLOBNBOX_C_IT it(&boxes_);
799 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
803 if (bbox == split_blob || !split_part->boxes_.empty()) {
804 split_part->
AddBox(it.extract());
816 right_key_tab_ =
false;
817 split_part->left_key_tab_ =
false;
832 if (split_x <= bounding_box_.
left() || split_x >= bounding_box_.
right())
836 BLOBNBOX_C_IT it(&boxes_);
837 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
842 if (box.
left() >= split_x) {
843 split_part->
AddBox(it.extract());
850 it.add_list_after(&split_part->boxes_);
859 right_key_tab_ =
false;
860 split_part->left_key_tab_ =
false;
861 right_margin_ = split_x;
862 split_part->left_margin_ = split_x;
870 bounding_box_ =
TBOX();
871 BLOBNBOX_C_IT it(&boxes_);
873 int non_leader_count = 0;
875 bounding_box_.
set_left(left_margin_);
880 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
892 tprintf(
"Computed left-illegal partition\n");
898 tprintf(
"Computed right-illegal partition\n");
905 median_top_ = bounding_box_.
top();
906 median_bottom_ = bounding_box_.
bottom();
907 median_size_ = bounding_box_.
height();
908 median_left_ = bounding_box_.
left();
909 median_right_ = bounding_box_.
right();
910 median_width_ = bounding_box_.
width();
913 STATS bottom_stats(bounding_box_.
bottom(), bounding_box_.
top() + 1);
915 STATS left_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
916 STATS right_stats(bounding_box_.
left(), bounding_box_.
right() + 1);
917 STATS width_stats(0, bounding_box_.
width() + 1);
918 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
922 int area = box.
area();
923 top_stats.add(box.
top(), area);
924 bottom_stats.add(box.
bottom(), area);
925 size_stats.add(box.
height(), area);
926 left_stats.add(box.
left(), area);
927 right_stats.add(box.
right(), area);
928 width_stats.add(box.
width(), area);
931 median_top_ =
static_cast<int>(top_stats.median() + 0.5);
932 median_bottom_ =
static_cast<int>(bottom_stats.median() + 0.5);
933 median_size_ =
static_cast<int>(size_stats.median() + 0.5);
934 median_left_ =
static_cast<int>(left_stats.median() + 0.5);
935 median_right_ =
static_cast<int>(right_stats.median() + 0.5);
936 median_width_ =
static_cast<int>(width_stats.median() + 0.5);
940 tprintf(
"Made partition with bad right coords");
944 tprintf(
"Made partition with bad left coords");
950 for (
int upper = 0; upper < 2; ++upper) {
951 ColPartition_CLIST partners;
952 ColPartition_C_IT part_it(&partners);
953 part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
954 for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
961 bounding_box_.
bottom())) {
962 tprintf(
"Recomputed box for partition %p\n",
this);
969 BLOBNBOX_C_IT it(&boxes_);
970 int overlap_count = 0;
971 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
976 return overlap_count;
982 int first_spanned_col = -1;
985 bounding_box_.
left(), bounding_box_.
right(),
987 MidY(), left_margin_, right_margin_,
988 &first_column_, &last_column_,
990 column_set_ = columns;
991 if (first_column_ < last_column_ && span_type ==
CST_PULLOUT &&
995 if (first_spanned_col >= 0) {
996 first_column_ = first_spanned_col;
997 last_column_ = first_spanned_col;
999 if ((first_column_ & 1) == 0)
1000 last_column_ = first_column_;
1001 else if ((last_column_ & 1) == 0)
1002 first_column_ = last_column_;
1004 first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1022 switch (blob_type_) {
1065 int* first_col,
int* last_col) {
1066 int first_spanned_col = -1;
1069 bounding_box_.
left(), bounding_box_.
right(),
1071 MidY(), left_margin_, right_margin_,
1072 first_col, last_col,
1073 &first_spanned_col);
1081 good_width_ = cb->
Run(width);
1082 good_column_ = blob_type_ ==
BRT_TEXT && left_key_tab_ && right_key_tab_;
1092 bool result =
false;
1094 int part_width = bounding_box_.
width();
1095 STATS gap_stats(0, part_width);
1096 STATS width_stats(0, part_width);
1097 BLOBNBOX_C_IT it(&boxes_);
1102 for (it.forward(); !it.at_first(); it.forward()) {
1107 width_stats.
add(right - left, 1);
1112 double median_gap = gap_stats.
median();
1116 double gap_iqr = gap_stats.
ile(0.75f) - gap_stats.
ile(0.25f);
1118 tprintf(
"gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1128 int offset =
static_cast<int>(ceil(gap_iqr * 2));
1129 int min_step =
static_cast<int>(median_gap +
median_width + 0.5);
1130 int max_step = min_step + offset;
1133 int part_left = bounding_box_.
left() - min_step / 2;
1134 part_width += min_step;
1136 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1141 for (
int x = left; x < right; ++x) {
1147 part_width, projection);
1148 if (best_end != NULL && best_end->
total_cost() < blob_count) {
1151 bool modified_blob_list =
false;
1152 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1156 if (it.at_first()) {
1157 int gap = it.data_relative(1)->bounding_box().
left() -
1161 modified_blob_list =
true;
1167 it.data_relative(-1)->bounding_box().right();
1170 modified_blob_list =
true;
1181 if (best_end == NULL) {
1188 delete [] projection;
1202 int good_blob_score_ = 0;
1203 int noisy_count = 0;
1204 int hline_count = 0;
1205 int vline_count = 0;
1206 BLOBNBOX_C_IT it(&boxes_);
1207 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1217 if (hline_count > vline_count) {
1220 }
else if (vline_count > hline_count) {
1223 }
else if (value < -1 || 1 < value) {
1227 long_side = bounding_box_.
width();
1228 short_side = bounding_box_.
height();
1231 long_side = bounding_box_.
height();
1232 short_side = bounding_box_.
width();
1248 if (flow_ ==
BTFT_CHAIN && strong_score == 3)
1256 if (noisy_count >= blob_count) {
1262 bounding_box_.
bottom())) {
1263 tprintf(
"RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1264 blob_count, noisy_count, good_blob_score_);
1265 tprintf(
" Projection value=%d, flow=%d, blob_type=%d\n",
1266 value, flow_, blob_type_);
1277 BLOBNBOX_C_IT it(&boxes_);
1278 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1295 int total_height = 0;
1297 int height_count = 0;
1299 BLOBNBOX_C_IT it(&boxes_);
1300 TBOX box(it.data()->bounding_box());
1306 ICOORD first_pt(box.right(), box.bottom());
1309 linepoints.
Add(first_pt);
1310 for (it.forward(); !it.at_last(); it.forward()) {
1313 ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1314 linepoints.
Add(box_pt);
1315 total_height += box.width();
1316 coverage += box.height();
1319 box = it.data()->bounding_box();
1320 ICOORD last_pt(box.right(), box.top());
1321 linepoints.
Add(last_pt);
1322 width = last_pt.y() - first_pt.y();
1326 TBOX box(it.data()->bounding_box());
1329 ICOORD first_pt(box.left(), box.bottom());
1330 linepoints.
Add(first_pt);
1331 for (it.forward(); !it.at_last(); it.forward()) {
1334 ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1335 linepoints.
Add(box_pt);
1336 total_height += box.height();
1337 coverage += box.width();
1340 box = it.data()->bounding_box();
1341 ICOORD last_pt(box.right(), box.bottom());
1342 linepoints.
Add(last_pt);
1343 width = last_pt.x() - first_pt.x();
1348 double error = linepoints.
Fit(&start_pt, &end_pt);
1356 ColPartition_LIST* used_parts,
1357 WorkingPartSet_LIST* working_sets) {
1360 block_owned_ =
true;
1361 WorkingPartSet_IT it(working_sets);
1364 if (partner != NULL && partner->working_set_ != NULL) {
1365 working_set_ = partner->working_set_;
1370 tprintf(
"Partition with partner has no working set!:");
1378 for (it.mark_cycle_pt(); !it.cycled_list() &&
1379 col_index != first_column_;
1380 it.forward(), ++col_index);
1382 tprintf(
"Match is %s for:", (col_index & 1) ?
"Real" :
"Between");
1386 tprintf(
"Target column=%d, only had %d\n", first_column_, col_index);
1389 work_set = it.data();
1392 if (!it.cycled_list() && last_column_ != first_column_ && !
IsPulloutType()) {
1394 BLOCK_LIST completed_blocks;
1395 TO_BLOCK_LIST to_blocks;
1396 for (; !it.cycled_list() && col_index <= last_column_;
1397 it.forward(), ++col_index) {
1400 &completed_blocks, &to_blocks);
1402 work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1404 working_set_ = work_set;
1416 ColPartition_LIST* block_parts,
1417 ColPartition_LIST* used_parts,
1418 BLOCK_LIST* completed_blocks,
1419 TO_BLOCK_LIST* to_blocks) {
1420 int page_height = tright.
y() - bleft.
y();
1422 ColPartition_IT it(block_parts);
1424 int max_line_height = 0;
1430 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1436 BLOBNBOX_C_IT blob_it(part->
boxes());
1437 int prev_bottom = blob_it.data()->bounding_box().bottom();
1438 for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1441 int step = bottom - prev_bottom;
1444 side_steps.add(step, 1);
1445 prev_bottom = bottom;
1447 part->
set_side_step(static_cast<int>(side_steps.median() + 0.5));
1448 if (!it.at_last()) {
1459 tprintf(
"side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1464 if (part_count == 0)
1467 SmoothSpacings(resolution, page_height, block_parts);
1470 BLOCK_IT block_it(completed_blocks);
1471 TO_BLOCK_IT to_block_it(to_blocks);
1472 ColPartition_LIST spacing_parts;
1473 ColPartition_IT sp_block_it(&spacing_parts);
1475 for (it.mark_cycle_pt(); !it.empty();) {
1477 sp_block_it.add_to_end(part);
1479 if (it.empty() || part->
bottom_spacing() > same_block_threshold ||
1480 !part->SpacingsEqual(*it.data(), resolution)) {
1483 if (!it.empty() && part->
bottom_spacing() <= same_block_threshold) {
1487 ColPartition* third_part = it.at_last() ? NULL : it.data_relative(1);
1489 tprintf(
"Spacings unequal: upper:%d/%d, lower:%d/%d," 1490 " sizes %d %d %d\n",
1494 third_part != NULL ? third_part->
median_size() : 0);
1498 if (part->SizesSimilar(*next_part) &&
1505 if (third_part == NULL ||
1506 !next_part->SizesSimilar(*third_part) ||
1513 sp_block_it.add_to_end(it.extract());
1516 tprintf(
"Added line to current block.\n");
1522 if (to_block != NULL) {
1523 to_block_it.add_to_end(to_block);
1524 block_it.add_to_end(to_block->
block);
1526 sp_block_it.set_to_list(&spacing_parts);
1530 tprintf(
"Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1541 if (pos->
x() < bleft.
x())
1543 if (pos->
x() > tright.
x())
1545 if (pos->
y() < bleft.
y())
1547 if (pos->
y() > tright.
y())
1555 static TO_BLOCK* MoveBlobsToBlock(
bool vertical_text,
int line_spacing,
1557 ColPartition_LIST* block_parts,
1558 ColPartition_LIST* used_parts) {
1564 STATS sizes(0,
MAX(block_box.width(), block_box.height()));
1566 ColPartition_IT it(block_parts);
1568 BLOBNBOX_IT blob_it(&to_block->
blobs);
1569 ColPartition_IT used_it(used_parts);
1570 for (it.move_to_first(); !it.empty(); it.forward()) {
1571 ColPartition* part = it.extract();
1575 for (BLOBNBOX_C_IT bb_it(part->boxes()); !bb_it.empty();
1578 if (bblob->
owner() != part) {
1579 tprintf(
"Ownership incorrect for blob:");
1583 if (bblob->
owner() == NULL) {
1596 C_OUTLINE_IT ol_it(outlines);
1597 ASSERT_HOST(!text_type || ol_it.data()->pathlength() > 0);
1602 blob_it.add_after_then_move(bblob);
1604 used_it.add_to_end(part);
1606 if (text_type && blob_it.empty()) {
1612 if (vertical_text) {
1614 if (block_width < line_spacing)
1615 line_spacing = block_width;
1616 to_block->
line_spacing =
static_cast<float>(line_spacing);
1617 to_block->
max_blob_size =
static_cast<float>(block_width + 1);
1620 if (block_height < line_spacing)
1621 line_spacing = block_height;
1622 to_block->
line_spacing =
static_cast<float>(line_spacing);
1623 to_block->
max_blob_size =
static_cast<float>(block_height + 1);
1631 ColPartition_LIST* block_parts,
1632 ColPartition_LIST* used_parts) {
1633 if (block_parts->empty())
1639 ColPartition_IT it(block_parts);
1650 ICOORDELT_LIST vertices;
1651 ICOORDELT_IT vert_it(&vertices);
1660 ColPartition::LeftEdgeRun(&it, &start, &end);
1662 ColPartition::RightEdgeRun(&it, &start, &end);
1663 ClipCoord(bleft, tright, &start);
1664 ClipCoord(bleft, tright, &end);
1665 vert_it.add_after_then_move(
new ICOORDELT(start));
1666 vert_it.add_after_then_move(
new ICOORDELT(end));
1671 if ((iteration == 0 && it.at_first()) ||
1672 (iteration == 1 && it.at_last())) {
1676 }
while (iteration < 2);
1678 tprintf(
"Making block at (%d,%d)->(%d,%d)\n",
1679 min_x, min_y, max_x, max_y);
1680 BLOCK* block =
new BLOCK(
"",
true, 0, 0, min_x, min_y, max_x, max_y);
1682 return MoveBlobsToBlock(
false, line_spacing, block, block_parts, used_parts);
1689 ColPartition_LIST* block_parts,
1690 ColPartition_LIST* used_parts) {
1691 if (block_parts->empty())
1693 ColPartition_IT it(block_parts);
1696 int line_spacing = block_box.
width();
1698 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1699 block_box += it.data()->bounding_box();
1706 block_box.
right(), block_box.
top());
1708 return MoveBlobsToBlock(
true, line_spacing, block, block_parts, used_parts);
1714 BLOBNBOX_C_IT blob_it(&boxes_);
1718 for (; !blob_it.empty(); blob_it.forward()) {
1719 BLOBNBOX* blob = blob_it.extract();
1724 row =
new TO_ROW(blob, static_cast<float>(top),
1725 static_cast<float>(bottom),
1726 static_cast<float>(line_size));
1728 row->
add_blob(blob, static_cast<float>(top),
1729 static_cast<float>(bottom),
1730 static_cast<float>(line_size));
1740 part->left_margin_ = left_margin_;
1741 part->right_margin_ = right_margin_;
1742 part->bounding_box_ = bounding_box_;
1743 memcpy(part->special_blobs_densities_, special_blobs_densities_,
1744 sizeof(special_blobs_densities_));
1745 part->median_bottom_ = median_bottom_;
1746 part->median_top_ = median_top_;
1747 part->median_size_ = median_size_;
1748 part->median_left_ = median_left_;
1749 part->median_right_ = median_right_;
1750 part->median_width_ = median_width_;
1751 part->good_width_ = good_width_;
1752 part->good_column_ = good_column_;
1753 part->left_key_tab_ = left_key_tab_;
1754 part->right_key_tab_ = right_key_tab_;
1755 part->type_ = type_;
1756 part->flow_ = flow_;
1757 part->left_key_ = left_key_;
1758 part->right_key_ = right_key_;
1759 part->first_column_ = first_column_;
1760 part->last_column_ = last_column_;
1761 part->owns_blobs_ =
false;
1768 BLOBNBOX_C_IT inserter(copy->
boxes());
1769 BLOBNBOX_C_IT traverser(
boxes());
1770 for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1771 inserter.add_after_then_move(traverser.data());
1775 #ifndef GRAPHICS_DISABLED 1783 #endif // GRAPHICS_DISABLED 1786 static char kBlobTypes[
BRT_COUNT + 1] =
"NHSRIUVT";
1791 tprintf(
"ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)" 1792 " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d" 1793 " ts=%d bs=%d ls=%d rs=%d\n",
1794 boxes_.empty() ?
'E' :
' ',
1795 left_margin_, left_key_tab_ ?
'T' :
'B',
LeftAtY(y),
1796 bounding_box_.
left(), median_left_,
1797 bounding_box_.
bottom(), median_bottom_,
1798 bounding_box_.
right(),
RightAtY(y), right_key_tab_ ?
'T' :
'B',
1799 right_margin_, median_right_, bounding_box_.
top(), median_top_,
1800 good_width_, good_column_, type_,
1801 kBlobTypes[blob_type_], flow_,
1802 first_column_, last_column_, boxes_.length(),
1803 space_above_, space_below_, space_to_left_, space_to_right_);
1808 tprintf(
"Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1809 color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1810 color1_[L_ALPHA_CHANNEL],
1811 color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1816 STATS left_stats(0, working_set_count);
1817 STATS right_stats(0, working_set_count);
1822 if (partner->type_ > max_type)
1823 max_type = partner->type_;
1824 if (column_set_ == partner->column_set_) {
1825 left_stats.
add(partner->first_column_, 1);
1826 right_stats.
add(partner->last_column_, 1);
1834 first_column_ = left_stats.
mode();
1835 last_column_ = right_stats.
mode();
1836 if (last_column_ < first_column_)
1837 last_column_ = first_column_;
1842 partner->type_ = max_type;
1843 #if 0 // See TODO above 1844 if (column_set_ == partner->column_set_) {
1845 partner->first_column_ = first_column_;
1846 partner->last_column_ = last_column_;
1887 RefinePartnersInternal(
true, get_desperate, grid);
1888 RefinePartnersInternal(
false, get_desperate, grid);
1892 RefinePartnersByType(
true, &upper_partners_);
1893 RefinePartnersByType(
false, &lower_partners_);
1897 if (!upper_partners_.empty() && !upper_partners_.singleton())
1898 RefinePartnersByOverlap(
true, &upper_partners_);
1899 if (!lower_partners_.empty() && !lower_partners_.singleton())
1900 RefinePartnersByOverlap(
false, &lower_partners_);
1909 void ColPartition::RefinePartnersInternal(
bool upper,
bool get_desperate,
1911 ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
1912 if (!partners->empty() && !partners->singleton()) {
1913 RefinePartnersByType(upper, partners);
1914 if (!partners->empty() && !partners->singleton()) {
1916 RefinePartnerShortcuts(upper, partners);
1917 if (!partners->empty() && !partners->singleton()) {
1921 RefineTextPartnersByMerge(upper,
false, partners, grid);
1922 if (!partners->empty() && !partners->singleton())
1923 RefineTextPartnersByMerge(upper,
true, partners, grid);
1926 if (!partners->empty() && !partners->singleton())
1927 RefinePartnersByOverlap(upper, partners);
1936 void ColPartition::RefinePartnersByType(
bool upper,
1937 ColPartition_CLIST* partners) {
1941 tprintf(
"Refining %d %s partners by type for:\n",
1942 partners->length(), upper ?
"Upper" :
"Lower");
1945 ColPartition_C_IT it(partners);
1951 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1958 partner->RemovePartner(!upper,
this);
1967 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1975 partner->RemovePartner(!upper,
this);
1990 void ColPartition::RefinePartnerShortcuts(
bool upper,
1991 ColPartition_CLIST* partners) {
1992 bool done_any =
false;
1995 ColPartition_C_IT it(partners);
1996 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2000 ColPartition_C_IT it1(upper ? &a->upper_partners_ : &a->lower_partners_);
2001 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
2006 a->RemovePartner(!upper,
this);
2009 ColPartition_C_IT it2(partners);
2010 for (it2.mark_cycle_pt(); !it2.cycled_list(); it2.forward()) {
2015 b2->RemovePartner(!upper,
this);
2028 }
while (done_any && !partners->empty() && !partners->singleton());
2039 void ColPartition::RefineTextPartnersByMerge(
bool upper,
bool desperate,
2040 ColPartition_CLIST* partners,
2041 ColPartitionGrid* grid) {
2045 tprintf(
"Refining %d %s partners by merge for:\n",
2046 partners->length(), upper ?
"Upper" :
"Lower");
2049 while (!partners->empty() && !partners->singleton()) {
2052 ColPartition_C_IT it(partners);
2056 ColPartition_CLIST candidates;
2057 ColPartition_C_IT cand_it(&candidates);
2058 for (it.forward(); !it.at_first(); it.forward()) {
2060 if (part->first_column_ == candidate->last_column_ &&
2061 part->last_column_ == candidate->first_column_)
2062 cand_it.add_after_then_move(it.data());
2064 int overlap_increase;
2065 ColPartition* candidate = grid->BestMergeCandidate(part, &candidates, debug,
2066 NULL, &overlap_increase);
2067 if (candidate != NULL && (overlap_increase <= 0 || desperate)) {
2069 tprintf(
"Merging:hoverlap=%d, voverlap=%d, OLI=%d\n",
2070 part->HCoreOverlap(*candidate), part->VCoreOverlap(*candidate),
2074 grid->RemoveBBox(candidate);
2075 grid->RemoveBBox(part);
2076 part->Absorb(candidate, NULL);
2078 grid->InsertBBox(
true,
true, part);
2079 if (overlap_increase > 0)
2080 part->desperately_merged_ =
true;
2089 void ColPartition::RefinePartnersByOverlap(
bool upper,
2090 ColPartition_CLIST* partners) {
2094 tprintf(
"Refining %d %s partners by overlap for:\n",
2095 partners->length(), upper ?
"Upper" :
"Lower");
2098 ColPartition_C_IT it(partners);
2101 int best_overlap = 0;
2102 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2104 int overlap =
MIN(bounding_box_.
right(), partner->bounding_box_.right())
2105 -
MAX(bounding_box_.
left(), partner->bounding_box_.left());
2106 if (overlap > best_overlap) {
2107 best_overlap = overlap;
2108 best_partner = partner;
2112 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2114 if (partner != best_partner) {
2119 partner->RemovePartner(!upper,
this);
2126 bool ColPartition::ThisPartitionBetter(
BLOBNBOX* bbox,
2127 const ColPartition& other) {
2130 int left = box.
left();
2131 int right = box.
right();
2132 if (left < left_margin_ || right > right_margin_)
2134 if (left < other.left_margin_ || right > other.right_margin_)
2136 int top = box.
top();
2137 int bottom = box.
bottom();
2138 int this_overlap =
MIN(top, median_top_) -
MAX(bottom, median_bottom_);
2139 int other_overlap =
MIN(top, other.median_top_) -
2140 MAX(bottom, other.median_bottom_);
2141 int this_miss = median_top_ - median_bottom_ - this_overlap;
2142 int other_miss = other.median_top_ - other.median_bottom_ - other_overlap;
2144 tprintf(
"Unique on (%d,%d)->(%d,%d) overlap %d/%d, miss %d/%d, mt=%d/%d\n",
2146 this_overlap, other_overlap, this_miss, other_miss,
2147 median_top_, other.median_top_);
2149 if (this_miss < other_miss)
2151 if (this_miss > other_miss)
2153 if (this_overlap > other_overlap)
2155 if (this_overlap < other_overlap)
2157 return median_top_ >= other.median_top_;
2164 static int MedianSpacing(
int page_height, ColPartition_IT it) {
2165 STATS stats(0, page_height);
2166 while (!it.cycled_list()) {
2167 ColPartition* part = it.data();
2169 stats.add(part->bottom_spacing(), 1);
2170 stats.add(part->top_spacing(), 1);
2172 return static_cast<int>(stats.median() + 0.5);
2186 return (last_column_ >= part.first_column_) &&
2187 (first_column_ <= part.last_column_);
2193 void ColPartition::SmoothSpacings(
int resolution,
int page_height,
2194 ColPartition_LIST* parts) {
2202 ColPartition_IT it(parts);
2209 int median_space = MedianSpacing(page_height, it);
2210 ColPartition_IT start_it(it);
2211 ColPartition_IT end_it(it);
2212 for (
int i = 0; i < PN_COUNT; ++i) {
2213 if (i < PN_UPPER || it.cycled_list()) {
2214 neighbourhood[i] = NULL;
2218 neighbourhood[i] = it.data();
2222 while (neighbourhood[PN_UPPER] != NULL) {
2244 if (neighbourhood[PN_LOWER] == NULL ||
2245 (!neighbourhood[PN_UPPER]->SpacingsEqual(*neighbourhood[PN_LOWER],
2247 !OKSpacingBlip(resolution, median_space, neighbourhood) &&
2248 (!OKSpacingBlip(resolution, median_space, neighbourhood - 1) ||
2249 !neighbourhood[PN_LOWER]->SpacingEqual(median_space, resolution)) &&
2250 (!OKSpacingBlip(resolution, median_space, neighbourhood + 1) ||
2251 !neighbourhood[PN_UPPER]->SpacingEqual(median_space, resolution)))) {
2254 ColPartition_IT sum_it(start_it);
2256 double total_bottom = 0.0;
2257 double total_top = 0.0;
2258 int total_count = 0;
2261 while (upper != last_part) {
2262 total_bottom += upper->bottom_spacing();
2263 total_top += upper->top_spacing();
2266 upper = sum_it.data();
2268 if (total_count > 0) {
2270 int top_spacing =
static_cast<int>(total_top / total_count + 0.5);
2271 int bottom_spacing =
static_cast<int>(total_bottom / total_count + 0.5);
2273 tprintf(
"Spacing run ended. Cause:");
2274 if (neighbourhood[PN_LOWER] == NULL) {
2277 tprintf(
"Spacing change. Spacings:\n");
2278 for (
int i = 0; i < PN_COUNT; ++i) {
2279 if (neighbourhood[i] == NULL) {
2281 if (i > 0 && neighbourhood[i - 1] != NULL) {
2286 tprintf(
" NULL lower partner:\n");
2292 tprintf(
"Top = %d, bottom = %d\n",
2301 upper = sum_it.data();
2302 while (upper != last_part) {
2310 upper = sum_it.data();
2317 median_space = MedianSpacing(page_height, end_it);
2320 for (
int j = 1; j < PN_COUNT; ++j) {
2321 neighbourhood[j - 1] = neighbourhood[j];
2323 if (it.cycled_list()) {
2324 neighbourhood[PN_COUNT - 1] = NULL;
2326 neighbourhood[PN_COUNT - 1] = it.data();
2336 bool ColPartition::OKSpacingBlip(
int resolution,
int median_spacing,
2337 ColPartition** parts) {
2338 if (parts[PN_UPPER] == NULL || parts[PN_LOWER] == NULL)
2342 return parts[PN_UPPER]->SummedSpacingOK(*parts[PN_LOWER],
2343 median_spacing, resolution) &&
2344 ((parts[PN_ABOVE1] != NULL &&
2345 parts[PN_ABOVE1]->SpacingEqual(median_spacing, resolution)) ||
2346 (parts[PN_BELOW1] != NULL &&
2347 parts[PN_BELOW1]->SpacingEqual(median_spacing, resolution)));
2352 bool ColPartition::SpacingEqual(
int spacing,
int resolution)
const {
2353 int bottom_error = BottomSpacingMargin(resolution);
2354 int top_error = TopSpacingMargin(resolution);
2355 return NearlyEqual(bottom_spacing_, spacing, bottom_error) &&
2361 bool ColPartition::SpacingsEqual(
const ColPartition& other,
2362 int resolution)
const {
2363 int bottom_error =
MAX(BottomSpacingMargin(resolution),
2364 other.BottomSpacingMargin(resolution));
2365 int top_error =
MAX(TopSpacingMargin(resolution),
2366 other.TopSpacingMargin(resolution));
2367 return NearlyEqual(bottom_spacing_, other.bottom_spacing_, bottom_error) &&
2368 (
NearlyEqual(top_spacing_, other.top_spacing_, top_error) ||
2369 NearlyEqual(top_spacing_ + other.top_spacing_, bottom_spacing_ * 2,
2376 bool ColPartition::SummedSpacingOK(
const ColPartition& other,
2377 int spacing,
int resolution)
const {
2378 int bottom_error =
MAX(BottomSpacingMargin(resolution),
2379 other.BottomSpacingMargin(resolution));
2380 int top_error =
MAX(TopSpacingMargin(resolution),
2381 other.TopSpacingMargin(resolution));
2382 int bottom_total = bottom_spacing_ + other.bottom_spacing_;
2383 int top_total = top_spacing_ + other.top_spacing_;
2384 return (
NearlyEqual(spacing, bottom_total, bottom_error) &&
2386 (
NearlyEqual(spacing * 2, bottom_total, bottom_error) &&
2392 int ColPartition::BottomSpacingMargin(
int resolution)
const {
2398 int ColPartition::TopSpacingMargin(
int resolution)
const {
2400 BottomSpacingMargin(resolution);
2405 bool ColPartition::SizesSimilar(
const ColPartition& other)
const {
2406 return median_size_ <= other.median_size_ *
kMaxSizeRatio &&
2413 static bool UpdateLeftMargin(
const ColPartition& part,
2414 int* margin_left,
int* margin_right) {
2415 const TBOX& part_box = part.bounding_box();
2416 int top = part_box.
top();
2417 int bottom = part_box.
bottom();
2418 int tl_key = part.SortKey(part.left_margin(), top);
2419 int tr_key = part.SortKey(part_box.
left(), top);
2420 int bl_key = part.SortKey(part.left_margin(), bottom);
2421 int br_key = part.SortKey(part_box.
left(), bottom);
2422 int left_key =
MAX(tl_key, bl_key);
2423 int right_key =
MIN(tr_key, br_key);
2424 if (left_key <= *margin_right && right_key >= *margin_left) {
2426 *margin_right =
MIN(*margin_right, right_key);
2427 *margin_left =
MAX(*margin_left, left_key);
2438 void ColPartition::LeftEdgeRun(ColPartition_IT* part_it,
2442 int start_y = part->bounding_box_.top();
2443 if (!part_it->at_first()) {
2444 int prev_bottom = part_it->data_relative(-1)->bounding_box_.bottom();
2445 if (prev_bottom < start_y)
2446 start_y = prev_bottom;
2447 else if (prev_bottom > start_y)
2448 start_y = (start_y + prev_bottom) / 2;
2450 int end_y = part->bounding_box_.bottom();
2453 UpdateLeftMargin(*part, &margin_left, &margin_right);
2456 part = part_it->data();
2457 }
while (!part_it->at_first() &&
2458 UpdateLeftMargin(*part, &margin_left, &margin_right));
2464 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right);
2465 if (next_margin_left > margin_right) {
2466 ColPartition_IT next_it(*part_it);
2469 part = next_it.data();
2470 }
while (!next_it.at_first() &&
2471 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2475 part_it->backward();
2476 part = part_it->data();
2477 }
while (part != start_part &&
2478 UpdateLeftMargin(*part, &next_margin_left, &next_margin_right));
2482 part = part_it->data_relative(-1);
2483 end_y = part->bounding_box_.bottom();
2484 if (!part_it->at_first() && part_it->data()->bounding_box_.top() < end_y)
2485 end_y = (end_y + part_it->data()->bounding_box_.top()) / 2;
2486 start->
set_y(start_y);
2487 start->
set_x(part->XAtY(margin_right, start_y));
2489 end->
set_x(part->XAtY(margin_right, end_y));
2491 tprintf(
"Left run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2492 start_y, end_y, part->XAtY(margin_left, end_y),
2493 end->
x(), part->left_margin_, part->bounding_box_.left());
2499 static bool UpdateRightMargin(
const ColPartition& part,
2500 int* margin_left,
int* margin_right) {
2501 const TBOX& part_box = part.bounding_box();
2502 int top = part_box.
top();
2503 int bottom = part_box.
bottom();
2504 int tl_key = part.SortKey(part_box.
right(), top);
2505 int tr_key = part.SortKey(part.right_margin(), top);
2506 int bl_key = part.SortKey(part_box.
right(), bottom);
2507 int br_key = part.SortKey(part.right_margin(), bottom);
2508 int left_key =
MAX(tl_key, bl_key);
2509 int right_key =
MIN(tr_key, br_key);
2510 if (left_key <= *margin_right && right_key >= *margin_left) {
2512 *margin_right =
MIN(*margin_right, right_key);
2513 *margin_left =
MAX(*margin_left, left_key);
2525 void ColPartition::RightEdgeRun(ColPartition_IT* part_it,
2529 int start_y = part->bounding_box_.bottom();
2530 if (!part_it->at_last()) {
2531 int next_y = part_it->data_relative(1)->bounding_box_.top();
2532 if (next_y > start_y)
2534 else if (next_y < start_y)
2535 start_y = (start_y + next_y) / 2;
2537 int end_y = part->bounding_box_.top();
2540 UpdateRightMargin(*part, &margin_left, &margin_right);
2542 part_it->backward();
2543 part = part_it->data();
2544 }
while (!part_it->at_last() &&
2545 UpdateRightMargin(*part, &margin_left, &margin_right));
2550 UpdateRightMargin(*part, &next_margin_left, &next_margin_right);
2551 if (next_margin_right < margin_left) {
2552 ColPartition_IT next_it(*part_it);
2555 part = next_it.data();
2556 }
while (!next_it.at_last() &&
2557 UpdateRightMargin(*part, &next_margin_left,
2558 &next_margin_right));
2563 part = part_it->data();
2564 }
while (part != start_part &&
2565 UpdateRightMargin(*part, &next_margin_left,
2566 &next_margin_right));
2567 part_it->backward();
2570 part = part_it->data_relative(1);
2571 end_y = part->bounding_box().top();
2572 if (!part_it->at_last() &&
2573 part_it->data()->bounding_box_.bottom() > end_y)
2574 end_y = (end_y + part_it->data()->bounding_box_.bottom()) / 2;
2575 start->
set_y(start_y);
2576 start->
set_x(part->XAtY(margin_left, start_y));
2578 end->
set_x(part->XAtY(margin_left, end_y));
2580 tprintf(
"Right run from y=%d to %d terminated with sum %d-%d, new %d-%d\n",
2581 start_y, end_y, end->
x(), part->XAtY(margin_right, end_y),
2582 part->bounding_box_.right(), part->right_margin_);
void SetRightTab(const TabVector *tab_vector)
tesseract::ColPartition * owner() const
const TBOX & bounding_box() const
void set_flow(BlobTextFlowType f)
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
bool NearlyEqual(T x, T y, T tolerance)
bool overlap(const TBOX &box) const
void CopyRightTab(const ColPartition &src, bool take_box)
void set_bottom_spacing(int spacing)
inT64 CostWithVariance(const DPPoint *prev)
TBOX BoundsWithoutBox(BLOBNBOX *box)
bool IsVerticalType() const
void AddBox(BLOBNBOX *box)
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
bool ReleaseNonLeaderBoxes()
const int kMinLeaderCount
void set_owns_blobs(bool owns_blobs)
static bool WithinTestRegion(int detail_level, int x, int y)
void set_left_margin(int margin)
void set_block_owned(bool owned)
static bool DifferentSizes(int size1, int size2)
const int kMinStrongTextValue
const double kMaxBaselineError
ScrollView::Color BoxColor() const
const double kMaxSizeRatio
PolyBlockType type() const
void SmoothPartnerRun(int working_set_count)
const int kHorzStrongTextlineCount
int bottom_spacing() const
void add(inT32 value, inT32 count)
double Fit(ICOORD *pt1, ICOORD *pt2)
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
void Add(const ICOORD &pt)
bool MarkAsLeaderIfMonospaced()
void AddLocalCost(int new_cost)
#define ELIST2IZE(CLASSNAME)
void AddPartition(ColPartition *part)
void SetRegionAndFlowTypesFromProjectionValue(int value)
float SpecialBlobsDensity(const BlobSpecialTextType type) const
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
BlobTextFlowType flow() const
void ColumnRange(int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
const double kMaxLeaderGapFractionOfMin
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
void set_flow(BlobTextFlowType value)
const double kMaxLeaderGapFractionOfMax
C_OUTLINE_LIST * out_list()
const int kColumnWidthFactor
void AddToWorkingSet(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
void SetColumnGoodness(WidthCallback *cb)
const int kMaxColorDistance
bool MatchingTextColor(const ColPartition &other) const
const double kMaxSpacingDrift
PolyBlockType PartitionType(ColumnSpanningType flow) const
void set_side_step(int step)
static int SortByBBox(const void *p1, const void *p2)
void Absorb(ColPartition *other, WidthCallback *cb)
static void LineSpacingBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
POLY_BLOCK * poly_block() const
void set_right_margin(int margin)
void RemovePartner(bool upper, ColPartition *partner)
const double kMaxTopSpacingFraction
int base_char_top() const
void set_poly_block(POLY_BLOCK *blk)
set the poly block
void set_top_spacing(int spacing)
int NoisyNeighbours() const
int RightBlobRule() const
int SpecialBlobsCount(const BlobSpecialTextType type)
bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
int XAtY(int sort_key, int y) const
inT16 x() const
access function
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
bool IsInSameColumnAs(const ColPartition &part) const
ColPartition * SingletonPartner(bool upper)
int RightAtY(int y) const
void CopyLeftTab(const ColPartition &src, bool take_box)
int base_char_bottom() const
void SetLeftTab(const TabVector *tab_vector)
bool ConfirmNoTabViolation(const ColPartition &other) const
void SetPartitionType(int resolution, ColPartitionSet *columns)
bool VSignificantCoreOverlap(const ColPartition &other) const
int median_bottom() const
void SetSpecialBlobsDensity(const BlobSpecialTextType type, const float density)
static double ColorDistanceFromLine(const uinT8 *line1, const uinT8 *line2, const uinT8 *point)
const int kHorzStrongTextlineAspect
int VCoreOverlap(const ColPartition &other) const
ColPartition * ShallowCopy() const
int textord_debug_tabfind
void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
static ColPartition * MakeLinePartition(BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
bool OKDiacriticMerge(const ColPartition &candidate, bool debug) const
const int kMinChainTextValue
bool MatchingColumns(const ColPartition &other) const
ColPartition * CopyButDontOwnBlobs()
void set_y(inT16 yin)
rewrite function
const int kMaxRMSColorNoise
void RefinePartners(PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
bool IsPulloutType() const
void set_x(inT16 xin)
rewrite function
BlobSpecialTextType special_text_type() const
const double kMaxSameBlockLineSpacing
double ile(double frac) const
void AddPartner(bool upper, ColPartition *partner)
void DisownBoxesNoAssert()
void RemoveBox(BLOBNBOX *box)
static C_BLOB * FakeBlob(const TBOX &box)
int CountOverlappingBoxes(const TBOX &box)
void set_region_type(BlobRegionType new_type)
BlobTextFlowType flow() const
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
ColPartition * SplitAtBlob(BLOBNBOX *split_blob)
void set_owner(tesseract::ColPartition *new_owner)
const TBOX & bounding_box() const
BlobRegionType region_type() const
const int kHorzStrongTextlineHeight
bool MatchingSizes(const ColPartition &other) const
ColPartition * SplitAt(int split_x)
const double kMinBaselineCoverage
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
BlobRegionType blob_type() const
#define CLISTIZE(CLASSNAME)
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
static ColPartition * FakePartition(const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
void set_type(PolyBlockType t)
BLOBNBOX * OverlapSplitBlob(const TBOX &box)
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
void ComputeSpecialBlobsDensity()
bool OKMergeOverlap(const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
inT16 y() const
access_function