tesseract  3.05.02
tabvector.h
Go to the documentation of this file.
1 // File: tabvector.h
3 // Description: Class to hold a near-vertical vector representing a tab-stop.
4 // Author: Ray Smith
5 // Created: Thu Apr 10 16:25:01 PST 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_TEXTORD_TABVECTOR_H__
21 #define TESSERACT_TEXTORD_TABVECTOR_H__
22 
23 #include "blobgrid.h"
24 #include "clst.h"
25 #include "elst.h"
26 #include "elst2.h"
27 #include "rect.h"
28 #include "bbgrid.h"
29 
30 class BLOBNBOX;
31 class ScrollView;
32 
33 namespace tesseract {
34 
35 
37  "Max fraction of mean blob width allowed for vertical gaps in vertical text");
39  "Fraction of box matches required to declare a line vertical");
40 
41 // The alignment type that a tab vector represents.
42 // Keep this enum synced with kAlignmentNames in tabvector.cpp.
51 };
52 
53 // Forward declarations. The classes use their own list types, so we
54 // need to make the list types first.
55 class TabFind;
56 class TabVector;
57 class TabConstraint;
58 
59 ELIST2IZEH(TabVector)
60 CLISTIZEH(TabVector)
61 ELISTIZEH(TabConstraint)
62 
63 // TabConstraint is a totally self-contained class to maintain
64 // a list of [min,max] constraints, each referring to a TabVector.
65 // The constraints are manipulated through static methods that act
66 // on a list of constraints. The list itself is cooperatively owned
67 // by the TabVectors of the constraints on the list and managed
68 // by implicit reference counting via the elements of the list.
69 class TabConstraint : public ELIST_LINK {
70  public:
72  // This empty constructor is here only so that the class can be ELISTIZED.
73  // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
74  // and eliminate CLASSNAME##_copier.
75  }
76 
77  // Create a constraint for the top or bottom of this TabVector.
78  static void CreateConstraint(TabVector* vector, bool is_top);
79 
80  // Test to see if the constraints are compatible enough to merge.
81  static bool CompatibleConstraints(TabConstraint_LIST* list1,
82  TabConstraint_LIST* list2);
83 
84  // Merge the lists of constraints and update the TabVector pointers.
85  // The second list is deleted.
86  static void MergeConstraints(TabConstraint_LIST* list1,
87  TabConstraint_LIST* list2);
88 
89  // Set all the tops and bottoms as appropriate to a mean of the
90  // constrained range. Delete all the constraints and list.
91  static void ApplyConstraints(TabConstraint_LIST* constraints);
92 
93  private:
94  TabConstraint(TabVector* vector, bool is_top);
95 
96  // Get the max of the mins and the min of the maxes.
97  static void GetConstraints(TabConstraint_LIST* constraints,
98  int* y_min, int* y_max);
99 
100  // The TabVector this constraint applies to.
101  TabVector* vector_;
102  // If true then we refer to the top of the vector_.
103  bool is_top_;
104  // The allowed range of this vector_.
105  int y_min_;
106  int y_max_;
107 };
108 
109 // Class to hold information about a single vector
110 // that represents a tab stop or a rule line.
111 class TabVector : public ELIST2_LINK {
112  public:
114  // TODO(rays) fix this in elst.h line 1076, where it should use the
115  // copy constructor instead of operator=.
116  }
117  ~TabVector();
118 
119  // Public factory to build a TabVector from a list of boxes.
120  // The TabVector will be of the given alignment type.
121  // The input vertical vector is used in fitting, and the output
122  // vertical_x, vertical_y have the resulting line vector added to them
123  // if the alignment is not ragged.
124  // The extended_start_y and extended_end_y are the maximum possible
125  // extension to the line segment that can be used to align with others.
126  // The input CLIST of BLOBNBOX good_points is consumed and taken over.
127  static TabVector* FitVector(TabAlignment alignment, ICOORD vertical,
128  int extended_start_y, int extended_end_y,
129  BLOBNBOX_CLIST* good_points,
130  int* vertical_x, int* vertical_y);
131 
132  // Build a ragged TabVector by copying another's direction, shifting it
133  // to match the given blob, and making its initial extent the height
134  // of the blob, but its extended bounds from the bounds of the original.
135  TabVector(const TabVector& src, TabAlignment alignment,
136  const ICOORD& vertical_skew, BLOBNBOX* blob);
137 
138  // Copies basic attributes of a tab vector for simple operations.
139  // Copies things such startpt, endpt, range, width.
140  // Does not copy things such as partners, boxes, or constraints.
141  // This is useful if you only need vector information for processing, such
142  // as in the table detection code.
143  TabVector* ShallowCopy() const;
144 
145  // Simple accessors.
146  const ICOORD& startpt() const {
147  return startpt_;
148  }
149  const ICOORD& endpt() const {
150  return endpt_;
151  }
152  int extended_ymax() const {
153  return extended_ymax_;
154  }
155  int extended_ymin() const {
156  return extended_ymin_;
157  }
158  int sort_key() const {
159  return sort_key_;
160  }
161  int mean_width() const {
162  return mean_width_;
163  }
164  void set_top_constraints(TabConstraint_LIST* constraints) {
165  top_constraints_ = constraints;
166  }
167  void set_bottom_constraints(TabConstraint_LIST* constraints) {
168  bottom_constraints_ = constraints;
169  }
170  TabVector_CLIST* partners() {
171  return &partners_;
172  }
173  void set_startpt(const ICOORD& start) {
174  startpt_ = start;
175  }
176  void set_endpt(const ICOORD& end) {
177  endpt_ = end;
178  }
179  bool intersects_other_lines() const {
180  return intersects_other_lines_;
181  }
182  void set_intersects_other_lines(bool value) {
183  intersects_other_lines_ = value;
184  }
185 
186  // Inline quasi-accessors that require some computation.
187 
188  // Compute the x coordinate at the given y coordinate.
189  int XAtY(int y) const {
190  int height = endpt_.y() - startpt_.y();
191  if (height != 0)
192  return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height +
193  startpt_.x();
194  else
195  return startpt_.x();
196  }
197 
198  // Compute the vertical overlap with the other TabVector.
199  int VOverlap(const TabVector& other) const {
200  return MIN(other.endpt_.y(), endpt_.y()) -
201  MAX(other.startpt_.y(), startpt_.y());
202  }
203  // Compute the vertical overlap with the given y bounds.
204  int VOverlap(int top_y, int bottom_y) const {
205  return MIN(top_y, endpt_.y()) - MAX(bottom_y, startpt_.y());
206  }
207  // Compute the extended vertical overlap with the given y bounds.
208  int ExtendedOverlap(int top_y, int bottom_y) const {
209  return MIN(top_y, extended_ymax_) - MAX(bottom_y, extended_ymin_);
210  }
211 
212  // Return true if this is a left tab stop, either aligned, or ragged.
213  bool IsLeftTab() const {
214  return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
215  }
216  // Return true if this is a right tab stop, either aligned, or ragged.
217  bool IsRightTab() const {
218  return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
219  }
220  // Return true if this is a separator.
221  bool IsSeparator() const {
222  return alignment_ == TA_SEPARATOR;
223  }
224  // Return true if this is a center aligned tab stop.
225  bool IsCenterTab() const {
226  return alignment_ == TA_CENTER_JUSTIFIED;
227  }
228  // Return true if this is a ragged tab top, either left or right.
229  bool IsRagged() const {
230  return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
231  }
232 
233  // Return true if this vector is to the left of the other in terms
234  // of sort_key_.
235  bool IsLeftOf(const TabVector& other) const {
236  return sort_key_ < other.sort_key_;
237  }
238 
239  // Return true if the vector has no partners.
240  bool Partnerless() {
241  return partners_.empty();
242  }
243 
244  // Return the number of tab boxes in this vector.
245  int BoxCount() {
246  return boxes_.length();
247  }
248 
249  // Lock the vector from refits by clearing the boxes_ list.
250  void Freeze() {
251  boxes_.shallow_clear();
252  }
253 
254  // Flip x and y on the ends so a vector can be created from flipped input.
255  void XYFlip() {
256  int x = startpt_.y();
257  startpt_.set_y(startpt_.x());
258  startpt_.set_x(x);
259  x = endpt_.y();
260  endpt_.set_y(endpt_.x());
261  endpt_.set_x(x);
262  }
263 
264  // Reflect the tab vector in the y-axis.
265  void ReflectInYAxis() {
266  startpt_.set_x(-startpt_.x());
267  endpt_.set_x(-endpt_.x());
268  sort_key_ = -sort_key_;
269  if (alignment_ == TA_LEFT_ALIGNED)
270  alignment_ = TA_RIGHT_ALIGNED;
271  else if (alignment_ == TA_RIGHT_ALIGNED)
272  alignment_ = TA_LEFT_ALIGNED;
273  if (alignment_ == TA_LEFT_RAGGED)
274  alignment_ = TA_RIGHT_RAGGED;
275  else if (alignment_ == TA_RIGHT_RAGGED)
276  alignment_ = TA_LEFT_RAGGED;
277  }
278 
279  // Separate function to compute the sort key for a given coordinate pair.
280  static int SortKey(const ICOORD& vertical, int x, int y) {
281  ICOORD pt(x, y);
282  return pt * vertical;
283  }
284 
285  // Return the x at the given y for the given sort key.
286  static int XAtY(const ICOORD& vertical, int sort_key, int y) {
287  if (vertical.y() != 0)
288  return (vertical.x() * y + sort_key) / vertical.y();
289  else
290  return sort_key;
291  }
292 
293  // Sort function for E2LIST::sort to sort by sort_key_.
294  static int SortVectorsByKey(const void* v1, const void* v2) {
295  const TabVector* tv1 = *reinterpret_cast<const TabVector* const *>(v1);
296  const TabVector* tv2 = *reinterpret_cast<const TabVector* const *>(v2);
297  return tv1->sort_key_ - tv2->sort_key_;
298  }
299 
300  // More complex members.
301 
302  // Extend this vector to include the supplied blob if it doesn't
303  // already have it.
304  void ExtendToBox(BLOBNBOX* blob);
305 
306  // Set the ycoord of the start and move the xcoord to match.
307  void SetYStart(int start_y);
308  // Set the ycoord of the end and move the xcoord to match.
309  void SetYEnd(int end_y);
310 
311  // Rotate the ends by the given vector.
312  void Rotate(const FCOORD& rotation);
313 
314  // Setup the initial constraints, being the limits of
315  // the vector and the extended ends.
316  void SetupConstraints();
317 
318  // Setup the constraints between the partners of this TabVector.
320 
321  // Setup the constraints between this and its partner.
322  void SetupPartnerConstraints(TabVector* partner);
323 
324  // Use the constraints to modify the top and bottom.
325  void ApplyConstraints();
326 
327  // Merge close tab vectors of the same side that overlap.
328  static void MergeSimilarTabVectors(const ICOORD& vertical,
329  TabVector_LIST* vectors, BlobGrid* grid);
330 
331  // Return true if this vector is the same side, overlaps, and close
332  // enough to the other to be merged.
333  bool SimilarTo(const ICOORD& vertical,
334  const TabVector& other, BlobGrid* grid) const;
335 
336  // Eat the other TabVector into this and delete it.
337  void MergeWith(const ICOORD& vertical, TabVector* other);
338 
339  // Add a new element to the list of partner TabVectors.
340  // Partners must be added in order of increasing y coordinate of the text line
341  // that makes them partners.
342  // Groups of identical partners are merged into one.
343  void AddPartner(TabVector* partner);
344 
345  // Return true if other is a partner of this.
346  bool IsAPartner(const TabVector* other);
347 
348  // Print basic information about this tab vector.
349  void Print(const char* prefix);
350 
351  // Print basic information about this tab vector and every box in it.
352  void Debug(const char* prefix);
353 
354  // Draw this tabvector in place in the given window.
355  void Display(ScrollView* tab_win);
356 
357  // Refit the line and/or re-evaluate the vector if the dirty flags are set.
358  void FitAndEvaluateIfNeeded(const ICOORD& vertical, TabFind* finder);
359 
360  // Evaluate the vector in terms of coverage of its length by good-looking
361  // box edges. A good looking box is one where its nearest neighbour on the
362  // inside is nearer than half the distance its nearest neighbour on the
363  // outside of the putative column. Bad boxes are removed from the line.
364  // A second pass then further filters boxes by requiring that the gutter
365  // width be a minimum fraction of the mean gutter along the line.
366  void Evaluate(const ICOORD& vertical, TabFind* finder);
367 
368  // (Re)Fit a line to the stored points. Returns false if the line
369  // is degenerate. Althougth the TabVector code mostly doesn't care about the
370  // direction of lines, XAtY would give silly results for a horizontal line.
371  // The class is mostly aimed at use for vertical lines representing
372  // horizontal tab stops.
373  bool Fit(ICOORD vertical, bool force_parallel);
374 
375  // Return the partner of this TabVector if the vector qualifies as
376  // being a vertical text line, otherwise NULL.
378 
379  // Return the matching tabvector if there is exactly one partner, or
380  // NULL otherwise. This can be used after matching is done, eg. by
381  // VerticalTextlinePartner(), without checking if the line is vertical.
383 
384  private:
385  // Constructor is private as the static factory is the external way
386  // to build a TabVector.
388  TabAlignment alignment, BLOBNBOX_CLIST* boxes);
389 
390  // Delete this, but first, repoint all the partners to point to
391  // replacement. If replacement is NULL, then partner relationships
392  // are removed.
393  void Delete(TabVector* replacement);
394 
395  private:
396  // The bottom of the tab line.
397  ICOORD startpt_;
398  // The top of the tab line.
399  ICOORD endpt_;
400  // The lowest y that the vector might extend to.
401  int extended_ymin_;
402  // The highest y that the vector might extend to.
403  int extended_ymax_;
404  // Perpendicular distance of vector from a given vertical for sorting.
405  int sort_key_;
406  // Result of Evaluate 0-100. Coverage of line with good boxes.
407  int percent_score_;
408  // The mean width of the blobs. Meaningful only for separator lines.
409  int mean_width_;
410  // True if the boxes_ list has been modified, so a refit is needed.
411  bool needs_refit_;
412  // True if a fit has been done, so re-evaluation is needed.
413  bool needs_evaluation_;
414  // True if a separator line intersects at least 2 other lines.
415  bool intersects_other_lines_;
416  // The type of this TabVector.
417  TabAlignment alignment_;
418  // The list of boxes whose edges are aligned at this TabVector.
419  BLOBNBOX_CLIST boxes_;
420  // List of TabVectors that have a connection with this via a text line.
421  TabVector_CLIST partners_;
422  // Constraints used to resolve the exact location of the top and bottom
423  // of the tab line.
424  TabConstraint_LIST* top_constraints_;
425  TabConstraint_LIST* bottom_constraints_;
426 };
427 
428 } // namespace tesseract.
429 
430 #endif // TESSERACT_TEXTORD_TABVECTOR_H__
void AddPartner(TabVector *partner)
Definition: tabvector.cpp:492
void Display(ScrollView *tab_win)
Definition: tabvector.cpp:547
#define CLISTIZEH(CLASSNAME)
Definition: clst.h:901
void set_intersects_other_lines(bool value)
Definition: tabvector.h:182
bool IsSeparator() const
Definition: tabvector.h:221
bool intersects_other_lines() const
Definition: tabvector.h:179
void set_bottom_constraints(TabConstraint_LIST *constraints)
Definition: tabvector.h:167
integer coordinate
Definition: points.h:30
double textord_tabvector_vertical_gap_fraction
Definition: tabvector.cpp:58
#define MIN(x, y)
Definition: ndminx.h:28
static int SortVectorsByKey(const void *v1, const void *v2)
Definition: tabvector.h:294
void MergeWith(const ICOORD &vertical, TabVector *other)
Definition: tabvector.cpp:458
void set_top_constraints(TabConstraint_LIST *constraints)
Definition: tabvector.h:164
int mean_width() const
Definition: tabvector.h:161
void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:573
bool SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const
Definition: tabvector.cpp:394
bool IsLeftOf(const TabVector &other) const
Definition: tabvector.h:235
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
void Evaluate(const ICOORD &vertical, TabFind *finder)
Definition: tabvector.cpp:587
int ExtendedOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:208
void Debug(const char *prefix)
Definition: tabvector.cpp:535
bool IsRightTab() const
Definition: tabvector.h:217
TabVector * GetSinglePartner()
Definition: tabvector.cpp:874
int VOverlap(const TabVector &other) const
Definition: tabvector.h:199
bool Fit(ICOORD vertical, bool force_parallel)
Definition: tabvector.cpp:788
void set_startpt(const ICOORD &start)
Definition: tabvector.h:173
int extended_ymin() const
Definition: tabvector.h:155
void SetYStart(int start_y)
Definition: tabvector.cpp:270
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:361
inT16 x() const
access function
Definition: points.h:52
void Rotate(const FCOORD &rotation)
Definition: tabvector.cpp:281
bool IsCenterTab() const
Definition: tabvector.h:225
void Print(const char *prefix)
Definition: tabvector.cpp:525
#define ELIST2IZEH(CLASSNAME)
Definition: elst2.h:950
bool IsAPartner(const TabVector *other)
Definition: tabvector.cpp:505
TabVector_CLIST * partners()
Definition: tabvector.h:170
static TabVector * FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y, int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x, int *vertical_y)
Definition: tabvector.cpp:182
#define MAX(x, y)
Definition: ndminx.h:24
double textord_tabvector_vertical_box_ratio
Definition: tabvector.cpp:61
TabVector * VerticalTextlinePartner()
Definition: tabvector.cpp:884
int XAtY(int y) const
Definition: tabvector.h:189
Definition: points.h:189
TabVector * ShallowCopy() const
Definition: tabvector.cpp:233
bool IsRagged() const
Definition: tabvector.h:229
int sort_key() const
Definition: tabvector.h:158
void set_y(inT16 yin)
rewrite function
Definition: points.h:65
void SetupPartnerConstraints()
Definition: tabvector.cpp:302
const ICOORD & startpt() const
Definition: tabvector.h:146
ELISTIZEH(AmbigSpec)
void set_x(inT16 xin)
rewrite function
Definition: points.h:61
const ICOORD & endpt() const
Definition: tabvector.h:149
void SetYEnd(int end_y)
Definition: tabvector.cpp:275
bool IsLeftTab() const
Definition: tabvector.h:213
int VOverlap(int top_y, int bottom_y) const
Definition: tabvector.h:204
static int XAtY(const ICOORD &vertical, int sort_key, int y)
Definition: tabvector.h:286
void ExtendToBox(BLOBNBOX *blob)
Definition: tabvector.cpp:246
int extended_ymax() const
Definition: tabvector.h:152
#define double_VAR_H(name, val, comment)
Definition: params.h:274
void set_endpt(const ICOORD &end)
Definition: tabvector.h:176
inT16 y() const
access_function
Definition: points.h:56