21 #pragma warning(disable:4244) // Conversion warnings 58 "max fraction of mean blob width allowed for vertical gaps in vertical text");
61 "Fraction of box matches required to declare a line vertical");
68 TabConstraint_LIST* constraints =
new TabConstraint_LIST;
69 TabConstraint_IT it(constraints);
70 it.add_to_end(constraint);
72 vector->set_top_constraints(constraints);
74 vector->set_bottom_constraints(constraints);
79 TabConstraint_LIST* list2) {
85 tprintf(
"Testing constraint compatibility\n");
86 GetConstraints(list1, &y_min, &y_max);
87 GetConstraints(list2, &y_min, &y_max);
89 tprintf(
"Resulting range = [%d,%d]\n", y_min, y_max);
90 return y_max >= y_min;
96 TabConstraint_LIST* list2) {
99 TabConstraint_IT it(list2);
101 tprintf(
"Merging constraints\n");
103 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
106 constraint->vector_->
Print(
"Merge");
107 if (constraint->is_top_)
113 it.add_list_before(list2);
122 GetConstraints(constraints, &y_min, &y_max);
123 int y = (y_min + y_max) / 2;
124 TabConstraint_IT it(constraints);
125 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
128 if (constraint->is_top_) {
140 : vector_(vector), is_top_(is_top) {
142 y_min_ = vector->
endpt().
y();
151 void TabConstraint::GetConstraints(TabConstraint_LIST* constraints,
152 int* y_min,
int* y_max) {
153 TabConstraint_IT it(constraints);
154 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
157 tprintf(
"Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_);
158 constraint->vector_->Print(
" for");
160 *y_min =
MAX(*y_min, constraint->y_min_);
161 *y_max =
MIN(*y_max, constraint->y_max_);
183 int extended_start_y,
int extended_end_y,
184 BLOBNBOX_CLIST* good_points,
185 int* vertical_x,
int* vertical_y) {
187 alignment, good_points);
188 if (!vector->
Fit(vertical,
false)) {
193 vertical = vector->endpt_ - vector->startpt_;
195 *vertical_x += vertical.
x() * weight;
196 *vertical_y += vertical.
y() * weight;
206 : extended_ymin_(src.extended_ymin_), extended_ymax_(src.extended_ymax_),
207 sort_key_(0), percent_score_(0), mean_width_(0),
208 needs_refit_(true), needs_evaluation_(true), intersects_other_lines_(false),
209 alignment_(alignment),
210 top_constraints_(NULL), bottom_constraints_(NULL) {
211 BLOBNBOX_C_IT it(&boxes_);
221 sort_key_ =
SortKey(vertical_skew,
222 (startpt_.
x() + endpt_.
x()) / 2,
223 (startpt_.
y() + endpt_.
y()) / 2);
225 Print(
"Constructed a new tab vector:");
235 copy->startpt_ = startpt_;
236 copy->endpt_ = endpt_;
237 copy->alignment_ = alignment_;
238 copy->extended_ymax_ = extended_ymax_;
239 copy->extended_ymin_ = extended_ymin_;
240 copy->intersects_other_lines_ = intersects_other_lines_;
248 BLOBNBOX_C_IT it(&boxes_);
252 while (!it.at_last() && box.
top() <= new_box.
top()) {
253 if (blob == new_blob)
259 if (box.
top() >= new_box.
top()) {
260 it.add_before_stay_put(new_blob);
266 it.add_after_stay_put(new_blob);
272 startpt_.
set_y(start_y);
282 startpt_.
rotate(rotation);
284 int dx = endpt_.
x() - startpt_.
x();
285 int dy = endpt_.
y() - startpt_.
y();
286 if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) {
306 TabVector_C_IT it(&partners_);
308 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
310 if (partner->top_constraints_ == NULL ||
311 partner->bottom_constraints_ == NULL) {
312 partner->
Print(
"Impossible: has no constraints");
313 Print(
"This vector has it as a partner");
316 if (prev_partner == NULL) {
319 partner->bottom_constraints_))
321 partner->bottom_constraints_);
325 partner->bottom_constraints_))
327 partner->bottom_constraints_);
329 prev_partner = partner;
333 partner->top_constraints_))
335 partner->top_constraints_);
343 partner->bottom_constraints_))
345 partner->bottom_constraints_);
347 partner->top_constraints_))
349 partner->top_constraints_);
354 if (top_constraints_ != NULL)
356 if (bottom_constraints_ != NULL)
362 TabVector_LIST* vectors,
364 TabVector_IT it1(vectors);
365 for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
367 TabVector_IT it2(it1);
368 for (it2.forward(); !it2.at_first(); it2.forward()) {
370 if (v2->
SimilarTo(vertical, *v1, grid)) {
374 v2->
Print(
"Merging");
375 v1->
Print(
"by deleting");
379 v2->
Print(
"Producing");
382 merged_vector -= v2->
startpt();
384 v2->
Print(
"Garbage result of merge?");
402 int v_scale = abs(vertical.
y());
422 sort_key_ < other.sort_key_) ?
this : &other;
423 int top_y = mover->endpt_.
y();
424 int bottom_y = mover->startpt_.
y();
425 int left =
MIN(mover->
XAtY(top_y), mover->
XAtY(bottom_y));
426 int right =
MAX(mover->
XAtY(top_y), mover->
XAtY(bottom_y));
427 int shift = abs(sort_key_ - other.sort_key_) / v_scale;
439 if (box.
top() > bottom_y)
444 int right_at_box = left_at_box;
446 right_at_box += shift;
448 left_at_box -= shift;
459 extended_ymin_ =
MIN(extended_ymin_, other->extended_ymin_);
460 extended_ymax_ =
MAX(extended_ymax_, other->extended_ymax_);
462 alignment_ = other->alignment_;
465 BLOBNBOX_C_IT it1(&boxes_);
466 BLOBNBOX_C_IT it2(&other->boxes_);
467 while (!it2.empty()) {
473 while (box1.
bottom() < box2.
bottom() && !it1.at_last()) {
479 it1.add_to_end(bbox2);
480 }
else if (bbox1 != bbox2) {
481 it1.add_before_stay_put(bbox2);
495 TabVector_C_IT it(&partners_);
498 if (it.data() == partner)
501 it.add_after_then_move(partner);
506 TabVector_C_IT it(&partners_);
507 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
508 if (it.data() == other)
527 "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d," 530 endpt_.
x(), endpt_.
y(), mean_width_, percent_score_, sort_key_,
531 boxes_.length(), partners_.length());
537 BLOBNBOX_C_IT it(&boxes_);
538 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
541 tprintf(
"Box at (%d,%d)->(%d,%d)\n",
548 #ifndef GRAPHICS_DISABLED 561 tab_win->
Line(startpt_.
x(), startpt_.
y(), endpt_.
x(), endpt_.
y());
563 tab_win->
Line(startpt_.
x(), startpt_.
y(), startpt_.
x(), extended_ymin_);
564 tab_win->
Line(endpt_.
x(), extended_ymax_, endpt_.
x(), endpt_.
y());
566 snprintf(score_buf,
sizeof(score_buf),
"%d", percent_score_);
568 tab_win->
Text(startpt_.
x(), startpt_.
y(), score_buf);
577 if (needs_evaluation_)
589 needs_evaluation_ =
false;
590 int length = endpt_.
y() - startpt_.
y();
591 if (length == 0 || boxes_.empty()) {
593 Print(
"Zero length in evaluate");
597 BLOBNBOX_C_IT it(&boxes_);
599 int height_count = 0;
600 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
603 int height = box.
height();
604 mean_height += height;
607 if (height_count > 0) mean_height /= height_count;
615 STATS gutters(0, max_gutter + 1);
619 int num_deleted_boxes = 0;
620 bool text_on_image =
false;
622 const TBOX* prev_good_box = NULL;
623 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
626 int mid_y = (box.
top() + box.
bottom()) / 2;
629 tprintf(
"After already deleting %d boxes, ", num_deleted_boxes);
630 Print(
"Starting evaluation");
638 int tab_x =
XAtY(mid_y);
642 bbox, &gutter_width, &neighbour_gap);
644 tprintf(
"Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n",
646 gutter_width, neighbour_gap);
652 gutters.
add(gutter_width, 1);
656 if (prev_good_box != NULL) {
657 int vertical_gap = box.
bottom() - prev_good_box->
top();
658 double size1 = sqrt(static_cast<double>(prev_good_box->
area()));
659 double size2 = sqrt(static_cast<double>(box.
area()));
661 good_length += vertical_gap;
663 tprintf(
"Box and prev good, gap=%d, target %g, goodlength=%d\n",
671 prev_good_box = &box;
673 text_on_image =
true;
677 tprintf(
"Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n",
679 gutter_width, neighbour_gap);
686 Print(
"Evaluating:");
691 int search_top = endpt_.
y();
692 int search_bottom = startpt_.
y();
695 prev_good_box = NULL;
696 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
699 int mid_y = (box.
top() + box.
bottom()) / 2;
703 int tab_x =
XAtY(mid_y);
713 bbox, &gutter_width, &neighbour_gap);
716 if (prev_good_box == NULL) {
719 search_bottom = box.
top();
721 prev_good_box = &box;
722 search_top = box.
bottom();
726 tprintf(
"Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n",
728 gutter_width, median_gutter);
736 if (prev_good_box != NULL) {
739 int length = endpt_.
y() - startpt_.
y();
740 percent_score_ = 100 * good_length / length;
741 if (num_deleted_boxes > 0) {
749 if (search_bottom > search_top) {
750 search_bottom = startpt_.
y();
751 search_top = endpt_.
y();
755 min_gutter_width *= mean_height;
757 if (median_gutter > max_gutter_width)
758 max_gutter_width = median_gutter;
759 int gutter_width = finder->
GutterWidth(search_bottom, search_top, *
this,
760 text_on_image, max_gutter_width,
762 if (gutter_width < min_gutter_width) {
764 tprintf(
"Rejecting bad tab Vector with %d gutter vs %g min\n",
765 gutter_width, min_gutter_width);
767 boxes_.shallow_clear();
770 tprintf(
"Final gutter %d, vs limit of %g, required shift = %d\n",
771 gutter_width, min_gutter_width, required_shift);
779 Print(
"Evaluation complete:");
789 needs_refit_ =
false;
790 if (boxes_.empty()) {
799 sort_key_ =
SortKey(vertical, midpt.
x(), midpt.
y());
800 return startpt_.
y() != endpt_.
y();
802 if (!force_parallel && !
IsRagged()) {
805 BLOBNBOX_C_IT it(&boxes_);
807 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
812 linepoints.
Add(boxpt);
815 linepoints.
Add(top_pt);
818 linepoints.
Fit(&startpt_, &endpt_);
819 if (startpt_.
y() != endpt_.
y()) {
821 vertical -= startpt_;
824 int start_y = startpt_.
y();
825 int end_y = endpt_.
y();
827 BLOBNBOX_C_IT it(&boxes_);
832 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
835 mean_width_ += box.
width();
840 int bottom_y = box.
bottom();
841 int top_y = box.
top();
842 int key =
SortKey(vertical, x1, bottom_y);
845 startpt_ =
ICOORD(x1, bottom_y);
847 key =
SortKey(vertical, x1, top_y);
850 startpt_ =
ICOORD(x1, top_y);
857 if (width_count > 0) {
858 mean_width_ = (mean_width_ + width_count - 1) / width_count;
860 endpt_ = startpt_ + vertical;
861 needs_evaluation_ =
true;
862 if (start_y != end_y) {
864 startpt_.
set_x(
XAtY(vertical, sort_key_, start_y));
865 startpt_.
set_y(start_y);
866 endpt_.
set_x(
XAtY(vertical, sort_key_, end_y));
875 if (!partners_.singleton())
877 TabVector_C_IT partner_it(&partners_);
885 if (!partners_.singleton())
887 TabVector_C_IT partner_it(&partners_);
889 BLOBNBOX_C_IT box_it1(&boxes_);
890 BLOBNBOX_C_IT box_it2(&partner->boxes_);
894 Print(
"Testing for vertical text");
895 partner->
Print(
" partner");
898 int num_unmatched = 0;
899 int total_widths = 0;
903 STATS gaps(0, width * 2);
905 box_it2.mark_cycle_pt();
906 for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
909 if (prev_bbox != NULL) {
912 while (!box_it2.cycled_list() && box_it2.data() != bbox &&
916 if (!box_it2.cycled_list() && box_it2.data() == bbox &&
922 total_widths += box.
width();
925 if (num_unmatched + num_matched == 0)
return NULL;
926 double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
928 int min_box_match =
static_cast<int>((num_matched + num_unmatched) *
930 bool is_vertical = (gaps.
get_total() > 0 &&
931 num_matched >= min_box_match &&
932 gaps.
median() <= max_gap);
934 tprintf(
"gaps=%d, matched=%d, unmatched=%d, min_match=%d " 935 "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
936 gaps.
get_total(), num_matched, num_unmatched, min_box_match,
937 gaps.
median(), avg_width, max_gap, is_vertical?
"Yes":
"No");
939 return (is_vertical) ? partner : NULL;
945 : extended_ymin_(extended_ymin), extended_ymax_(extended_ymax),
946 sort_key_(0), percent_score_(0), mean_width_(0),
947 needs_refit_(true), needs_evaluation_(true), alignment_(alignment),
948 top_constraints_(NULL), bottom_constraints_(NULL) {
949 BLOBNBOX_C_IT it(&boxes_);
950 it.add_list_after(boxes);
956 void TabVector::Delete(TabVector* replacement) {
957 TabVector_C_IT it(&partners_);
958 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
960 TabVector_C_IT p_it(&partner->partners_);
963 TabVector* partner_replacement = replacement;
964 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
966 if (p_partner == partner_replacement) {
967 partner_replacement = NULL;
972 for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
974 if (p_partner ==
this) {
976 if (partner_replacement != NULL)
977 p_it.add_before_stay_put(partner_replacement);
980 if (partner_replacement != NULL) {
981 partner_replacement->AddPartner(partner);
const TBOX & bounding_box() const
void AddPartner(TabVector *partner)
static void CreateConstraint(TabVector *vector, bool is_top)
const int kGutterToNeighbourRatio
void Display(ScrollView *tab_win)
int IntCastRounded(double x)
const ICOORD & topright() const
void set_bottom_constraints(TabConstraint_LIST *constraints)
static bool WithinTestRegion(int detail_level, int x, int y)
const int kGutterMultiple
BBC * NextVerticalSearch(bool top_to_bottom)
static bool CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2)
const int kSimilarRaggedDist
double textord_tabvector_vertical_gap_fraction
void Line(int x1, int y1, int x2, int y2)
void add(inT32 value, inT32 count)
double Fit(ICOORD *pt1, ICOORD *pt2)
void Add(const ICOORD &pt)
#define ELIST2IZE(CLASSNAME)
void StartVerticalSearch(int xmin, int xmax, int y)
const int kSimilarVectorDist
const double kMinRaggedGutter
BlobTextFlowType flow() const
void MergeWith(const ICOORD &vertical, TabVector *other)
void set_top_constraints(TabConstraint_LIST *constraints)
void TextAttributes(const char *font, int pixel_size, bool bold, bool italic, bool underlined)
void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder)
bool SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const
static int SortKey(const ICOORD &vertical, int x, int y)
void Evaluate(const ICOORD &vertical, TabFind *finder)
int ExtendedOverlap(int top_y, int bottom_y) const
void Debug(const char *prefix)
void Text(int x, int y, const char *mystring)
static void ApplyConstraints(TabConstraint_LIST *constraints)
TabVector * GetSinglePartner()
bool Fit(ICOORD vertical, bool force_parallel)
int extended_ymin() const
void SetYStart(int start_y)
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
static void MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2)
inT16 x() const
access function
void Rotate(const FCOORD &rotation)
void Print(const char *prefix)
bool IsAPartner(const TabVector *other)
static TabVector * FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y, int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x, int *vertical_y)
void rotate(const FCOORD &vec)
bool textord_debug_printable
int GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
int textord_debug_tabfind
double textord_tabvector_vertical_box_ratio
TabVector * VerticalTextlinePartner()
const int kMaxFillinMultiple
TabVector * ShallowCopy() const
const double kMinAlignedGutter
void set_y(inT16 yin)
rewrite function
const double kLineCountReciprocal
void SetupPartnerConstraints()
const ICOORD & startpt() const
void set_x(inT16 xin)
rewrite function
const ICOORD & endpt() const
void GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
const double kMinGutterFraction
#define double_VAR(name, val, comment)
BlobRegionType region_type() const
#define CLISTIZE(CLASSNAME)
void ExtendToBox(BLOBNBOX *blob)
int extended_ymax() const
const ICOORD & botleft() const
const char * kAlignmentNames[]
inT16 y() const
access_function