tesseract  3.05.02
tablefind.h
Go to the documentation of this file.
1 // File: tablefind.h
3 // Description: Helper classes to find tables from ColPartitions.
4 // Author: Faisal Shafait (faisal.shafait@dfki.de)
5 // Created: Tue Jan 06 11:13:01 PST 2009
6 //
7 // (C) Copyright 2009, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_TEXTORD_TABLEFIND_H__
21 #define TESSERACT_TEXTORD_TABLEFIND_H__
22 
23 #include "colpartitiongrid.h"
24 #include "elst.h"
25 #include "rect.h"
26 
27 namespace tesseract {
28 
29 // Possible types for a column segment.
30 enum ColSegType {
36 };
37 
38 class ColPartitionSet;
39 
40 // ColSegment holds rectangular blocks that represent segmentation of a page
41 // into regions containing single column text/table.
42 class ColSegment;
43 ELISTIZEH(ColSegment)
44 CLISTIZEH(ColSegment)
45 
46 class ColSegment : public ELIST_LINK {
47  public:
48  ColSegment();
49  ~ColSegment();
50 
51  // Simple accessors and mutators
52  const TBOX& bounding_box() const {
53  return bounding_box_;
54  }
55 
56  void set_top(int y) {
57  bounding_box_.set_top(y);
58  }
59 
60  void set_bottom(int y) {
61  bounding_box_.set_bottom(y);
62  }
63 
64  void set_left(int x) {
65  bounding_box_.set_left(x);
66  }
67 
68  void set_right(int x) {
69  bounding_box_.set_right(x);
70  }
71 
72  void set_bounding_box(const TBOX& other) {
73  bounding_box_ = other;
74  }
75 
76  int get_num_table_cells() const {
77  return num_table_cells_;
78  }
79 
80  // set the number of table colpartitions covered by the bounding_box_
81  void set_num_table_cells(int n) {
82  num_table_cells_ = n;
83  }
84 
85  int get_num_text_cells() const {
86  return num_text_cells_;
87  }
88 
89  // set the number of text colpartitions covered by the bounding_box_
90  void set_num_text_cells(int n) {
91  num_text_cells_ = n;
92  }
93 
94  ColSegType type() const {
95  return type_;
96  }
97 
98  // set the type of the block based on the ratio of table to text
99  // colpartitions covered by it.
100  void set_type();
101 
102  // Provides a color for BBGrid to draw the rectangle.
103  ScrollView::Color BoxColor() const;
104 
105  // Insert a rectangle into bounding_box_
106  void InsertBox(const TBOX& other);
107 
108  private:
109  TBOX bounding_box_; // bounding box
110  int num_table_cells_;
111  int num_text_cells_;
112  ColSegType type_;
113 };
114 
115 // Typedef BBGrid of ColSegments
116 typedef BBGrid<ColSegment,
117  ColSegment_CLIST,
118  ColSegment_C_IT> ColSegmentGrid;
119 typedef GridSearch<ColSegment,
120  ColSegment_CLIST,
121  ColSegment_C_IT> ColSegmentGridSearch;
122 
123 // TableFinder is a utility class to find a set of tables given a set of
124 // ColPartitions and Columns. The TableFinder will mark candidate ColPartitions
125 // based on research in "Table Detection in Heterogeneous Documents".
126 // Usage flow is as follows:
127 // TableFinder finder;
128 // finder.InsertCleanPartitions(/* grid info */)
129 // finder.LocateTables(/* ColPartitions and Columns */);
130 // finder.Update TODO(nbeato)
131 class TableFinder {
132  public:
133  // Constructor is simple initializations
134  TableFinder();
135  ~TableFinder();
136 
137  // Set the resolution of the connected components in ppi.
138  void set_resolution(int resolution) {
139  resolution_ = resolution;
140  }
141  // Change the reading order. Initially it is left to right.
142  void set_left_to_right_language(bool order);
143 
144  // Initialize
145  void Init(int grid_size, const ICOORD& bottom_left, const ICOORD& top_right);
146 
147  // Copy cleaned partitions from ColumnFinder's part_grid_ to this
148  // clean_part_grid_ and insert dot-like noise into period_grid_.
149  // It resizes the grids in this object to the dimensions of grid.
150  void InsertCleanPartitions(ColPartitionGrid* grid, TO_BLOCK* block);
151 
152  // High level function to perform table detection
153  // Finds tables and updates the grid object with new partitions for the
154  // tables. The columns and width callbacks are used to merge tables.
155  // The reskew argument is only used to write the tables to the out.png
156  // if that feature is enabled.
157  void LocateTables(ColPartitionGrid* grid,
158  ColPartitionSet** columns,
159  WidthCallback* width_cb,
160  const FCOORD& reskew);
161 
162  protected:
163  // Access for the grid dimensions.
164  // The results will not be correct until InsertCleanPartitions
165  // has been called. The values are taken from the grid passed as an argument
166  // to that function.
167  int gridsize() const;
168  int gridwidth() const;
169  int gridheight() const;
170  const ICOORD& bleft() const;
171  const ICOORD& tright() const;
172 
173  // Makes a window for debugging, see BBGrid
174  ScrollView* MakeWindow(int x, int y, const char* window_name);
175 
178  // Inserts text into the table finder.
179  void InsertTextPartition(ColPartition* part);
185  bool AllowTextPartition(const ColPartition& part) const;
186  bool AllowBlob(const BLOBNBOX& blob) const;
187 
191 
192  // Utility function to move segments to col_seg_grid
193  // Note: Move includes ownership,
194  // so segments will be be owned by col_seg_grid
195  void MoveColSegmentsToGrid(ColSegment_LIST* segments,
196  ColSegmentGrid* col_seg_grid);
197 
201 
202  // Initialize the grid and partitions
203  void InitializePartitions(ColPartitionSet** all_columns);
204 
205  // Set left, right and top, bottom spacings of each colpartition.
206  // Left/right spacings are w.r.t the column boundaries
207  // Top/bottom spacings are w.r.t. previous and next colpartitions
208  static void SetPartitionSpacings(ColPartitionGrid* grid,
209  ColPartitionSet** all_columns);
210 
211  // Set spacing and closest neighbors above and below a given colpartition.
212  void SetVerticalSpacing(ColPartition* part);
213 
214  // Set global spacing estimates. This function is dependent on the
215  // partition spacings. So make sure SetPartitionSpacings is called
216  // on the same grid before this.
218  // Access to the global median xheight. The xheight is the height
219  // of a lowercase 'x' character on the page. This can be viewed as the
220  // average height of a lowercase letter in a textline. As a result
221  // it is used to make assumptions about spacing between words and
222  // table cells.
223  void set_global_median_xheight(int xheight);
224  // Access to the global median blob width. The width is useful
225  // when deciding if a partition is noise.
226  void set_global_median_blob_width(int width);
227  // Access to the global median ledding. The ledding is the distance between
228  // two adjacent text lines. This value can be used to get a rough estimate
229  // for the amount of space between two lines of text. As a result, it
230  // is used to calculate appropriate spacing between adjacent rows of text.
231  void set_global_median_ledding(int ledding);
232 
233  // Updates the nearest neighbors for each ColPartition in clean_part_grid_.
234  // The neighbors are most likely SingletonPartner calls after the neighbors
235  // are assigned. This is hear until it is decided to remove the
236  // nearest_neighbor code in ColPartition
237  void FindNeighbors();
238 
243 
244  // High level function to mark partitions as table rows/cells.
245  // When this function is done, the column partitions in clean_part_grid_
246  // should mostly be marked as tables.
247  void MarkTablePartitions();
248  // Marks partitions given a local view of a single partition
251  // Check if the partition has at least one large gap between words or no
252  // significant gap at all
253  // TODO(nbeato): Make const, prevented because blobnbox array access
254  bool HasWideOrNoInterWordGap(ColPartition* part) const;
255  // Checks if a partition is adjacent to leaders on the page
256  bool HasLeaderAdjacent(const ColPartition& part);
257  // Filter individual text partitions marked as table partitions
258  // consisting of paragraph endings, small section headings, and
259  // headers and footers.
260  void FilterFalseAlarms();
261  void FilterParagraphEndings();
262  void FilterHeaderAndFooter();
263  // Mark all ColPartitions as table cells that have a table cell above
264  // and below them
266 
279 
280  // Get Column segments from best_columns_
281  void GetColumnBlocks(ColPartitionSet** columns,
282  ColSegment_LIST *col_segments);
283 
284  // Group Column segments into consecutive single column regions.
285  void GroupColumnBlocks(ColSegment_LIST *current_segments,
286  ColSegment_LIST *col_segments);
287 
288  // Check if two boxes are consecutive within the same column
289  bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2);
290 
291  // Set the ratio of candidate table partitions in each column
292  void SetColumnsType(ColSegment_LIST* col_segments);
293 
294  // Merge Column Blocks that were split due to the presence of a table
295  void GridMergeColumnBlocks();
296 
302 
303  // Merge partititons cells into table columns
304  // Differs from paper by just looking at marked table partitions
305  // instead of similarity metric.
306  // Modified section 4.1 of paper.
307  void GetTableColumns(ColSegment_LIST *table_columns);
308 
309  // Finds regions within a column that potentially contain a table.
310  // Ie, the table columns from GetTableColumns are turned into boxes
311  // that span the entire page column (using ColumnBlocks found in
312  // earlier functions) in the x direction and the min/max extent of
313  // overlapping table columns in the y direction.
314  // Section 4.2 of paper.
315  void GetTableRegions(ColSegment_LIST *table_columns,
316  ColSegment_LIST *table_regions);
317 
318 
321 
322  // Merge table regions corresponding to tables spanning multiple columns
323  void GridMergeTableRegions();
324  bool BelongToOneTable(const TBOX &box1, const TBOX &box2);
325 
326  // Adjust table boundaries by building a tight bounding box around all
327  // ColPartitions contained in it.
328  void AdjustTableBoundaries();
329 
330  // Grows a table to include partitions that are partially covered
331  // by the table. This includes lines and text. It does not include
332  // noise or images.
333  // On entry, result_box is the minimum size of the result. The results of the
334  // function will union the actual result with result_box.
335  void GrowTableBox(const TBOX& table_box, TBOX* result_box);
336  // Grow a table by increasing the size of the box to include
337  // partitions with significant overlap with the table.
338  void GrowTableToIncludePartials(const TBOX& table_box,
339  const TBOX& search_range,
340  TBOX* result_box);
341  // Grow a table by expanding to the extents of significantly
342  // overlapping lines.
343  void GrowTableToIncludeLines(const TBOX& table_box, const TBOX& search_range,
344  TBOX* result_box);
345  // Checks whether the horizontal line belong to the table by looking at the
346  // side spacing of extra ColParitions that will be included in the table
347  // due to expansion
348  bool HLineBelongsToTable(const ColPartition& part, const TBOX& table_box);
349 
350  // Look for isolated column headers above the given table box and
351  // include them in the table
352  void IncludeLeftOutColumnHeaders(TBOX* table_box);
353 
354  // Remove false alarms consiting of a single column
356 
357  // Return true if at least one gap larger than the global x-height
358  // exists in the horizontal projection
359  bool GapInXProjection(int* xprojection, int length);
360 
363  // This function will run the table recognizer and try to find better
364  // bounding boxes. The structures of the tables never leave this function
365  // right now. It just tries to prune and merge tables based on info it
366  // has available.
367  void RecognizeTables();
368 
372 
373  // Displays Colpartitions marked as table row. Overlays them on top of
374  // part_grid_.
375  void DisplayColSegments(ScrollView* win, ColSegment_LIST *cols,
376  ScrollView::Color color);
377 
378  // Displays the colpartitions using a new coloring on an existing window.
379  // Note: This method is only for debug purpose during development and
380  // would not be part of checked in code
382  ScrollView::Color text_color,
383  ScrollView::Color table_color);
385  ScrollView::Color default_color);
387  ColPartitionGrid* grid,
388  ScrollView::Color default_color);
390  ScrollView::Color color);
391 
392  // Write ColParitions and Tables to a PIX image
393  // Note: This method is only for debug purpose during development and
394  // would not be part of checked in code
395  void WriteToPix(const FCOORD& reskew);
396 
397  // Merge all colpartitions in table regions to make them a single
398  // colpartition and revert types of isolated table cells not
399  // assigned to any table to their original types.
401  ColPartitionSet** columns,
402  WidthCallback* width_cb);
403 
405  // Useful objects used during table find process.
407  // Resolution of the connected components in ppi.
409  // Estimate of median x-height over the page
411  // Estimate of the median blob width on the page
413  // Estimate of median leading on the page
415  // Grid to hold cleaned colpartitions after removing all
416  // colpartitions that consist of only noise blobs, and removing
417  // noise blobs from remaining colpartitions.
419  // Grid contains the leaders and ruling lines.
421  // Grid contains the broken down column partitions. It can be thought
422  // of as a "word" grid. However, it usually doesn't break apart text lines.
423  // It does break apart table data (most of the time).
425  // Grid of page column blocks
427  // Grid of detected tables
429  // The reading order of text. Defaults to true, for languages such as English.
431 };
432 
433 } // namespace tesseract.
434 
435 #endif // TESSERACT_TEXTORD_TABLEFIND_H__
void GrowTableBox(const TBOX &table_box, TBOX *result_box)
Definition: tablefind.cpp:1528
ColSegmentGrid col_seg_grid_
Definition: tablefind.h:426
bool AllowBlob(const BLOBNBOX &blob) const
Definition: tablefind.cpp:510
void InsertLeaderPartition(ColPartition *part)
Definition: tablefind.cpp:418
void set_global_median_ledding(int ledding)
Definition: tablefind.cpp:770
void MoveColSegmentsToGrid(ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid)
Definition: tablefind.cpp:1184
#define CLISTIZEH(CLASSNAME)
Definition: clst.h:901
void MarkPartitionsUsingLocalInformation()
Definition: tablefind.cpp:835
void InsertRulingPartition(ColPartition *part)
Definition: tablefind.cpp:426
void GetColumnBlocks(ColPartitionSet **columns, ColSegment_LIST *col_segments)
Definition: tablefind.cpp:531
void set_top(int y)
Definition: tablefind.h:56
integer coordinate
Definition: points.h:30
int get_num_text_cells() const
Definition: tablefind.h:85
const TBOX & bounding_box() const
Definition: tablefind.h:52
ColPartitionGrid clean_part_grid_
Definition: tablefind.h:418
void GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIST *table_regions)
Definition: tablefind.cpp:1331
void set_num_table_cells(int n)
Definition: tablefind.h:81
void MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback *width_cb)
Definition: tablefind.cpp:2079
bool AllowTextPartition(const ColPartition &part) const
Definition: tablefind.cpp:497
void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback *width_cb, const FCOORD &reskew)
Definition: tablefind.cpp:264
void GroupColumnBlocks(ColSegment_LIST *current_segments, ColSegment_LIST *col_segments)
Definition: tablefind.cpp:546
int get_num_table_cells() const
Definition: tablefind.h:76
ColSegmentGrid table_grid_
Definition: tablefind.h:428
ColSegType type() const
Definition: tablefind.h:94
void Init(int grid_size, const ICOORD &bottom_left, const ICOORD &top_right)
Definition: tablefind.cpp:186
void GetTableColumns(ColSegment_LIST *table_columns)
Definition: tablefind.cpp:1281
void set_global_median_blob_width(int width)
Definition: tablefind.cpp:767
bool BelongToOneTable(const TBOX &box1, const TBOX &box2)
Definition: tablefind.cpp:1452
void set_right(int x)
Definition: tablefind.h:68
void InsertImagePartition(ColPartition *part)
Definition: tablefind.cpp:429
void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color text_color, ScrollView::Color table_color)
Definition: tablefind.cpp:1924
void set_left_to_right_language(bool order)
Definition: tablefind.cpp:182
bool HasLeaderAdjacent(const ColPartition &part)
Definition: tablefind.cpp:954
void DisplayColPartitionConnections(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color default_color)
Definition: tablefind.cpp:1958
bool HasWideOrNoInterWordGap(ColPartition *part) const
Definition: tablefind.cpp:865
void set_num_text_cells(int n)
Definition: tablefind.h:90
bool HLineBelongsToTable(const ColPartition &part, const TBOX &table_box)
Definition: tablefind.cpp:1608
ColPartitionGrid fragmented_text_grid_
Definition: tablefind.h:424
int gridheight() const
Definition: tablefind.cpp:392
ColPartitionGrid leader_and_ruling_grid_
Definition: tablefind.h:420
void InsertFragmentedTextPartition(ColPartition *part)
Definition: tablefind.cpp:410
const ICOORD & bleft() const
Definition: tablefind.cpp:395
void InitializePartitions(ColPartitionSet **all_columns)
Definition: tablefind.cpp:587
void set_bounding_box(const TBOX &other)
Definition: tablefind.h:72
void set_left(int x)
Definition: tablefind.h:64
void DisplayColSegmentGrid(ScrollView *win, ColSegmentGrid *grid, ScrollView::Color color)
Definition: tablefind.cpp:1899
Definition: points.h:189
void set_global_median_xheight(int xheight)
Definition: tablefind.cpp:764
void SetGlobalSpacings(ColPartitionGrid *grid)
Definition: tablefind.cpp:717
void set_resolution(int resolution)
Definition: tablefind.h:138
void GrowTableToIncludePartials(const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
Definition: tablefind.cpp:1550
void GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
Definition: tablefind.cpp:1578
ELISTIZEH(AmbigSpec)
bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2)
Definition: tablefind.cpp:576
void SetVerticalSpacing(ColPartition *part)
Definition: tablefind.cpp:674
void InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block)
Definition: tablefind.cpp:198
void InsertTextPartition(ColPartition *part)
Definition: tablefind.cpp:402
const ICOORD & tright() const
Definition: tablefind.cpp:398
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: tablefind.cpp:526
Definition: rect.h:30
void SplitAndInsertFragmentedTextPartition(ColPartition *part)
Definition: tablefind.cpp:444
bool GapInXProjection(int *xprojection, int length)
Definition: tablefind.cpp:1776
GridSearch< ColSegment, ColSegment_CLIST, ColSegment_C_IT > ColSegmentGridSearch
Definition: tablefind.h:121
void DisplayColSegments(ScrollView *win, ColSegment_LIST *cols, ScrollView::Color color)
Definition: tablefind.cpp:1879
void SetColumnsType(ColSegment_LIST *col_segments)
Definition: tablefind.cpp:1151
BBGrid< ColSegment, ColSegment_CLIST, ColSegment_C_IT > ColSegmentGrid
Definition: tablefind.h:118
void IncludeLeftOutColumnHeaders(TBOX *table_box)
Definition: tablefind.cpp:1672
void WriteToPix(const FCOORD &reskew)
Definition: tablefind.cpp:2006
void set_bottom(int y)
Definition: tablefind.h:60
static void SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet **all_columns)
Definition: tablefind.cpp:594