tesseract  3.05.02
tablefind.cpp
Go to the documentation of this file.
1 // File: tablefind.cpp
3 // Description: Helper classes to find tables from ColPartitions.
4 // Author: Faisal Shafait (faisal.shafait@dfki.de)
5 // Created: Tue Jan 06 11:13:01 PST 2009
6 //
7 // (C) Copyright 2009, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifdef _MSC_VER
21 #pragma warning(disable:4244) // Conversion warnings
22 #endif
23 
24 #ifdef HAVE_CONFIG_H
25 #include "config_auto.h"
26 #endif
27 
28 #include "tablefind.h"
29 #include <math.h>
30 
31 #include "allheaders.h"
32 
33 #include "colpartitionset.h"
34 #include "tablerecog.h"
35 
36 namespace tesseract {
37 
38 // These numbers are used to calculate the global median stats.
39 // They just set an upper bound on the stats objects.
40 // Maximum vertical spacing between neighbor partitions.
41 const int kMaxVerticalSpacing = 500;
42 // Maximum width of a blob in a partition.
43 const int kMaxBlobWidth = 500;
44 
45 // Minimum whitespace size to split a partition (measured as a multiple
46 // of a partition's median width).
47 const double kSplitPartitionSize = 2.0;
48 // To insert text, the partition must satisfy these size constraints
49 // in AllowTextPartition(). The idea is to filter noise partitions
50 // determined by the size compared to the global medians.
51 // TODO(nbeato): Need to find good numbers again.
52 const double kAllowTextHeight = 0.5;
53 const double kAllowTextWidth = 0.6;
54 const double kAllowTextArea = 0.8;
55 // The same thing applies to blobs (to filter noise).
56 // TODO(nbeato): These numbers are a shot in the dark...
57 // height and width are 0.5 * gridsize() in colfind.cpp
58 // area is a rough guess for the size of a period.
59 const double kAllowBlobHeight = 0.3;
60 const double kAllowBlobWidth = 0.4;
61 const double kAllowBlobArea = 0.05;
62 
63 // Minimum number of components in a text partition. A partition having fewer
64 // components than that is more likely a data partition and is a candidate
65 // table cell.
66 const int kMinBoxesInTextPartition = 10;
67 
68 // Maximum number of components that a data partition can have
69 const int kMaxBoxesInDataPartition = 20;
70 
71 // Maximum allowed gap in a text partitions as a multiple of its median size.
72 const double kMaxGapInTextPartition = 4.0;
73 
74 // Minimum value that the maximum gap in a text partition should have as a
75 // factor of its median size.
76 const double kMinMaxGapInTextPartition = 0.5;
77 
78 // The amount of overlap that is "normal" for adjacent blobs in a text
79 // partition. This is used to calculate gap between overlapping blobs.
80 const double kMaxBlobOverlapFactor = 4.0;
81 
82 // Maximum x-height a table partition can have as a multiple of global
83 // median x-height
84 const double kMaxTableCellXheight = 2.0;
85 
86 // Maximum line spacing between a table column header and column contents
87 // for merging the two (as a multiple of the partition's median_size).
89 
90 // Minimum ratio of num_table_partitions to num_text_partitions in a column
91 // block to be called it a table column
92 const double kTableColumnThreshold = 3.0;
93 
94 // Search for horizontal ruling lines within the vertical margin as a
95 // multiple of grid size
96 const int kRulingVerticalMargin = 3;
97 
98 // Minimum overlap that a colpartition must have with a table region
99 // to become part of that table
100 const double kMinOverlapWithTable = 0.6;
101 
102 // Maximum side space (distance from column boundary) that a typical
103 // text-line in flowing text should have as a multiple of its x-height
104 // (Median size).
105 const int kSideSpaceMargin = 10;
106 
107 // Fraction of the peak of x-projection of a table region to set the
108 // threshold for the x-projection histogram
109 const double kSmallTableProjectionThreshold = 0.35;
110 const double kLargeTableProjectionThreshold = 0.45;
111 // Minimum number of rows required to look for more rows in the projection.
112 const int kLargeTableRowCount = 6;
113 
114 // Minimum number of rows in a table
115 const int kMinRowsInTable = 3;
116 
117 // The amount of padding (multiplied by global_median_xheight_ during use)
118 // that is vertically added to the search adjacent leader search during
119 // ColPartition marking.
121 
122 // Used when filtering false positives. When finding the last line
123 // of a paragraph (typically left-aligned), the previous line should have
124 // its center to the right of the last line by this scaled amount.
126 
127 // The maximum amount of whitespace allowed left of a paragraph ending.
128 // Do not filter a ColPartition with more than this space left of it.
130 
131 // Used when filtering false positives. The last line of a paragraph
132 // should be preceded by a line that is predominantly text. This is the
133 // ratio of text to whitespace (to the right of the text) that is required
134 // for the previous line to be a text.
136 
137 // When counting table columns, this is the required gap between two columns
138 // (it is multiplied by global_median_xheight_).
139 const double kMaxXProjectionGapFactor = 2.0;
140 
141 // Used for similarity in partitions using stroke width. Values copied
142 // from ColFind.cpp in Ray's CL.
144 const double kStrokeWidthConstantTolerance = 2.0;
145 
146 BOOL_VAR(textord_dump_table_images, false, "Paint table detection output");
147 BOOL_VAR(textord_show_tables, false, "Show table regions");
149  "Debug table marking steps in detail");
151  "Show page stats used in table finding");
153  "Enables the table recognizer for table layout and filtering.");
154 
157 
158 // Templated helper function used to create destructor callbacks for the
159 // BBGrid::ClearGridData() method.
160 template <typename T> void DeleteObject(T *object) {
161  delete object;
162 }
163 
165  : resolution_(0),
166  global_median_xheight_(0),
167  global_median_blob_width_(0),
168  global_median_ledding_(0),
169  left_to_right_language_(true) {
170 }
171 
173  // ColPartitions and ColSegments created by this class for storage in grids
174  // need to be deleted explicitly.
175  clean_part_grid_.ClearGridData(&DeleteObject<ColPartition>);
176  leader_and_ruling_grid_.ClearGridData(&DeleteObject<ColPartition>);
177  fragmented_text_grid_.ClearGridData(&DeleteObject<ColPartition>);
178  col_seg_grid_.ClearGridData(&DeleteObject<ColSegment>);
179  table_grid_.ClearGridData(&DeleteObject<ColSegment>);
180 }
181 
183  left_to_right_language_ = order;
184 }
185 
186 void TableFinder::Init(int grid_size, const ICOORD& bottom_left,
187  const ICOORD& top_right) {
188  // Initialize clean partitions list and grid
189  clean_part_grid_.Init(grid_size, bottom_left, top_right);
190  leader_and_ruling_grid_.Init(grid_size, bottom_left, top_right);
191  fragmented_text_grid_.Init(grid_size, bottom_left, top_right);
192  col_seg_grid_.Init(grid_size, bottom_left, top_right);
193  table_grid_.Init(grid_size, bottom_left, top_right);
194 }
195 
196 // Copy cleaned partitions from part_grid_ to clean_part_grid_ and
197 // insert leaders and rulers into the leader_and_ruling_grid_
199  TO_BLOCK* block) {
200  // Calculate stats. This lets us filter partitions in AllowTextPartition()
201  // and filter blobs in AllowBlob().
202  SetGlobalSpacings(grid);
203 
204  // Iterate the ColPartitions in the grid.
205  ColPartitionGridSearch gsearch(grid);
206  gsearch.SetUniqueMode(true);
207  gsearch.StartFullSearch();
208  ColPartition* part = NULL;
209  while ((part = gsearch.NextFullSearch()) != NULL) {
210  // Reject partitions with nothing useful inside of them.
211  if (part->blob_type() == BRT_NOISE || part->bounding_box().area() <= 0)
212  continue;
213  ColPartition* clean_part = part->ShallowCopy();
214  ColPartition* leader_part = NULL;
215  if (part->IsLineType()) {
216  InsertRulingPartition(clean_part);
217  continue;
218  }
219  // Insert all non-text partitions to clean_parts
220  if (!part->IsTextType()) {
221  InsertImagePartition(clean_part);
222  continue;
223  }
224  // Insert text colpartitions after removing noisy components from them
225  // The leaders are split into a separate grid.
226  BLOBNBOX_CLIST* part_boxes = part->boxes();
227  BLOBNBOX_C_IT pit(part_boxes);
228  for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
229  BLOBNBOX *pblob = pit.data();
230  // Bad blobs... happens in UNLV set.
231  // news.3G1, page 17 (around x=6)
232  if (!AllowBlob(*pblob))
233  continue;
234  if (pblob->flow() == BTFT_LEADER) {
235  if (leader_part == NULL) {
236  leader_part = part->ShallowCopy();
237  leader_part->set_flow(BTFT_LEADER);
238  }
239  leader_part->AddBox(pblob);
240  } else if (pblob->region_type() != BRT_NOISE) {
241  clean_part->AddBox(pblob);
242  }
243  }
244  clean_part->ComputeLimits();
245  ColPartition* fragmented = clean_part->CopyButDontOwnBlobs();
246  InsertTextPartition(clean_part);
248  if (leader_part != NULL) {
249  // TODO(nbeato): Note that ComputeLimits does not update the column
250  // information. So the leader may appear to span more columns than it
251  // really does later on when IsInSameColumnAs gets called to test
252  // for adjacent leaders.
253  leader_part->ComputeLimits();
254  InsertLeaderPartition(leader_part);
255  }
256  }
257 
258  // Make the partition partners better for upper and lower neighbors.
261 }
262 
263 // High level function to perform table detection
265  ColPartitionSet** all_columns,
266  WidthCallback* width_cb,
267  const FCOORD& reskew) {
268  // initialize spacing, neighbors, and columns
269  InitializePartitions(all_columns);
270 
271 #ifndef GRAPHICS_DISABLED
272  if (textord_show_tables) {
273  ScrollView* table_win = MakeWindow(0, 300, "Column Partitions & Neighbors");
279 
280  table_win = MakeWindow(100, 300, "Fragmented Text");
282  }
283 #endif // GRAPHICS_DISABLED
284 
285  // mark, filter, and smooth candidate table partitions
287 
288  // Make single-column blocks from good_columns_ partitions. col_segments are
289  // moved to a grid later which takes the ownership
290  ColSegment_LIST column_blocks;
291  GetColumnBlocks(all_columns, &column_blocks);
292  // Set the ratio of candidate table partitions in each column
293  SetColumnsType(&column_blocks);
294 
295  // Move column segments to col_seg_grid_
296  MoveColSegmentsToGrid(&column_blocks, &col_seg_grid_);
297 
298  // Detect split in column layout that might have occurred due to the
299  // presence of a table. In such a case, merge the corresponding columns.
301 
302  // Group horizontally overlapping table partitions into table columns.
303  // table_columns created here get deleted at the end of this method.
304  ColSegment_LIST table_columns;
305  GetTableColumns(&table_columns);
306 
307  // Within each column, mark the range table regions occupy based on the
308  // table columns detected. table_regions are moved to a grid later which
309  // takes the ownership
310  ColSegment_LIST table_regions;
311  GetTableRegions(&table_columns, &table_regions);
312 
313 #ifndef GRAPHICS_DISABLED
315  ScrollView* table_win = MakeWindow(1200, 300, "Table Columns and Regions");
316  DisplayColSegments(table_win, &table_columns, ScrollView::DARK_TURQUOISE);
317  DisplayColSegments(table_win, &table_regions, ScrollView::YELLOW);
318  }
319 #endif // GRAPHICS_DISABLED
320 
321  // Merge table regions across columns for tables spanning multiple
322  // columns
323  MoveColSegmentsToGrid(&table_regions, &table_grid_);
325 
326  // Adjust table boundaries by including nearby horizontal lines and left
327  // out column headers
330 
332  // Remove false alarms consiting of a single column
334 
335 #ifndef GRAPHICS_DISABLED
336  if (textord_show_tables) {
337  ScrollView* table_win = MakeWindow(1200, 300, "Detected Table Locations");
339  DisplayColSegments(table_win, &table_columns, ScrollView::KHAKI);
340  table_grid_.DisplayBoxes(table_win);
341  }
342 #endif // GRAPHICS_DISABLED
343 
344  // Find table grid structure and reject tables that are malformed.
345  RecognizeTables();
347  RecognizeTables();
348 
349 #ifndef GRAPHICS_DISABLED
350  if (textord_show_tables) {
351  ScrollView* table_win = MakeWindow(1400, 600, "Recognized Tables");
354  table_grid_.DisplayBoxes(table_win);
355  }
356 #endif // GRAPHICS_DISABLED
357  } else {
358  // Remove false alarms consiting of a single column
359  // TODO(nbeato): verify this is a NOP after structured table rejection.
360  // Right now it isn't. If the recognize function is doing what it is
361  // supposed to do, this function is obsolete.
363 
364 #ifndef GRAPHICS_DISABLED
365  if (textord_show_tables) {
366  ScrollView* table_win = MakeWindow(1500, 300, "Detected Tables");
369  table_grid_.DisplayBoxes(table_win);
370  }
371 #endif // GRAPHICS_DISABLED
372  }
373 
375  WriteToPix(reskew);
376 
377  // Merge all colpartitions in table regions to make them a single
378  // colpartition and revert types of isolated table cells not
379  // assigned to any table to their original types.
380  MakeTableBlocks(grid, all_columns, width_cb);
381 }
382 // All grids have the same dimensions. The clean_part_grid_ sizes are set from
383 // the part_grid_ that is passed to InsertCleanPartitions, which was the same as
384 // the grid that is the base of ColumnFinder. Just return the clean_part_grid_
385 // dimensions instead of duplicated memory.
387  return clean_part_grid_.gridsize();
388 }
390  return clean_part_grid_.gridwidth();
391 }
393  return clean_part_grid_.gridheight();
394 }
395 const ICOORD& TableFinder::bleft() const {
396  return clean_part_grid_.bleft();
397 }
398 const ICOORD& TableFinder::tright() const {
399  return clean_part_grid_.tright();
400 }
401 
403  ASSERT_HOST(part != NULL);
404  if (AllowTextPartition(*part)) {
405  clean_part_grid_.InsertBBox(true, true, part);
406  } else {
407  delete part;
408  }
409 }
411  ASSERT_HOST(part != NULL);
412  if (AllowTextPartition(*part)) {
413  fragmented_text_grid_.InsertBBox(true, true, part);
414  } else {
415  delete part;
416  }
417 }
419  ASSERT_HOST(part != NULL);
420  if (!part->IsEmpty() && part->bounding_box().area() > 0) {
421  leader_and_ruling_grid_.InsertBBox(true, true, part);
422  } else {
423  delete part;
424  }
425 }
427  leader_and_ruling_grid_.InsertBBox(true, true, part);
428 }
430  // NOTE: If images are placed into a different grid in the future,
431  // the function SetPartitionSpacings needs to be updated. It should
432  // be the only thing that cares about image partitions.
433  clean_part_grid_.InsertBBox(true, true, part);
434 }
435 
436 // Splits a partition into its "words". The splits happen
437 // at locations with wide inter-blob spacing. This is useful
438 // because it allows the table recognize to "cut through" the
439 // text lines on the page. The assumption is that a table
440 // will have several lines with similar overlapping whitespace
441 // whereas text will not have this type of property.
442 // Note: The code Assumes that blobs are sorted by the left side x!
443 // This will not work (as well) if the blobs are sorted by center/right.
445  ASSERT_HOST(part != NULL);
446  // Bye bye empty partitions!
447  if (part->boxes()->empty()) {
448  delete part;
449  return;
450  }
451 
452  // The AllowBlob function prevents this.
453  ASSERT_HOST(part->median_width() > 0);
454  const double kThreshold = part->median_width() * kSplitPartitionSize;
455 
456  ColPartition* right_part = part;
457  bool found_split = true;
458  while (found_split) {
459  found_split = false;
460  BLOBNBOX_C_IT box_it(right_part->boxes());
461  // Blobs are sorted left side first. If blobs overlap,
462  // the previous blob may have a "more right" right side.
463  // Account for this by always keeping the largest "right"
464  // so far.
465  int previous_right = MIN_INT32;
466 
467  // Look for the next split in the partition.
468  for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
469  const TBOX& box = box_it.data()->bounding_box();
470  if (previous_right != MIN_INT32 &&
471  box.left() - previous_right > kThreshold) {
472  // We have a split position. Split the partition in two pieces.
473  // Insert the left piece in the grid and keep processing the right.
474  int mid_x = (box.left() + previous_right) / 2;
475  ColPartition* left_part = right_part;
476  right_part = left_part->SplitAt(mid_x);
477 
479  found_split = true;
480  break;
481  }
482 
483  // The right side of the previous blobs.
484  previous_right = MAX(previous_right, box.right());
485  }
486  }
487  // When a split is not found, the right part is minimized
488  // as much as possible, so process it.
489  InsertFragmentedTextPartition(right_part);
490 }
491 
492 // Some simple criteria to filter out now. We want to make sure the
493 // average blob size in the partition is consistent with the
494 // global page stats.
495 // The area metric will almost always pass for multi-blob partitions.
496 // It is useful when filtering out noise caused by an isolated blob.
498  const double kHeightRequired = global_median_xheight_ * kAllowTextHeight;
499  const double kWidthRequired = global_median_blob_width_ * kAllowTextWidth;
500  const int median_area = global_median_xheight_ * global_median_blob_width_;
501  const double kAreaPerBlobRequired = median_area * kAllowTextArea;
502  // Keep comparisons strictly greater to disallow 0!
503  return part.median_size() > kHeightRequired &&
504  part.median_width() > kWidthRequired &&
505  part.bounding_box().area() > kAreaPerBlobRequired * part.boxes_count();
506 }
507 
508 // Same as above, applied to blobs. Keep in mind that
509 // leaders, commas, and periods are important in tables.
510 bool TableFinder::AllowBlob(const BLOBNBOX& blob) const {
511  const TBOX& box = blob.bounding_box();
512  const double kHeightRequired = global_median_xheight_ * kAllowBlobHeight;
513  const double kWidthRequired = global_median_blob_width_ * kAllowBlobWidth;
514  const int median_area = global_median_xheight_ * global_median_blob_width_;
515  const double kAreaRequired = median_area * kAllowBlobArea;
516  // Keep comparisons strictly greater to disallow 0!
517  return box.height() > kHeightRequired &&
518  box.width() > kWidthRequired &&
519  box.area() > kAreaRequired;
520 }
521 
522 // TODO(nbeato): The grid that makes the window doesn't seem to matter.
523 // The only downside is that window messages will be caught by
524 // clean_part_grid_ instead of a useful object. This is a temporary solution
525 // for the debug windows created by the TableFinder.
526 ScrollView* TableFinder::MakeWindow(int x, int y, const char* window_name) {
527  return clean_part_grid_.MakeWindow(x, y, window_name);
528 }
529 
530 // Make single-column blocks from good_columns_ partitions.
532  ColSegment_LIST* column_blocks) {
533  for (int i = 0; i < gridheight(); ++i) {
534  ColPartitionSet* columns = all_columns[i];
535  if (columns != NULL) {
536  ColSegment_LIST new_blocks;
537  // Get boxes from the current vertical position on the grid
538  columns->GetColumnBoxes(i * gridsize(), (i+1) * gridsize(), &new_blocks);
539  // Merge the new_blocks boxes into column_blocks if they are well-aligned
540  GroupColumnBlocks(&new_blocks, column_blocks);
541  }
542  }
543 }
544 
545 // Merge column segments into the current list if they are well aligned.
546 void TableFinder::GroupColumnBlocks(ColSegment_LIST* new_blocks,
547  ColSegment_LIST* column_blocks) {
548  ColSegment_IT src_it(new_blocks);
549  ColSegment_IT dest_it(column_blocks);
550  // iterate through the source list
551  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
552  ColSegment* src_seg = src_it.data();
553  const TBOX& src_box = src_seg->bounding_box();
554  bool match_found = false;
555  // iterate through the destination list to find a matching column block
556  for (dest_it.mark_cycle_pt(); !dest_it.cycled_list(); dest_it.forward()) {
557  ColSegment* dest_seg = dest_it.data();
558  TBOX dest_box = dest_seg->bounding_box();
559  if (ConsecutiveBoxes(src_box, dest_box)) {
560  // If matching block is found, insert the current block into it
561  // and delete the soure block
562  dest_seg->InsertBox(src_box);
563  match_found = true;
564  delete src_it.extract();
565  break;
566  }
567  }
568  // If no match is found, just append the source block to column_blocks
569  if (!match_found) {
570  dest_it.add_after_then_move(src_it.extract());
571  }
572  }
573 }
574 
575 // are the two boxes immediate neighbors along the vertical direction
576 bool TableFinder::ConsecutiveBoxes(const TBOX &b1, const TBOX &b2) {
577  int x_margin = 20;
578  int y_margin = 5;
579  return (abs(b1.left() - b2.left()) < x_margin) &&
580  (abs(b1.right() - b2.right()) < x_margin) &&
581  (abs(b1.top()-b2.bottom()) < y_margin ||
582  abs(b2.top()-b1.bottom()) < y_margin);
583 }
584 
585 // Set up info for clean_part_grid_ partitions to be valid during detection
586 // code.
588  FindNeighbors();
589  SetPartitionSpacings(&clean_part_grid_, all_columns);
591 }
592 
593 // Set left, right and top, bottom spacings of each colpartition.
595  ColPartitionSet** all_columns) {
596  // Iterate the ColPartitions in the grid.
597  ColPartitionGridSearch gsearch(grid);
598  gsearch.StartFullSearch();
599  ColPartition* part = NULL;
600  while ((part = gsearch.NextFullSearch()) != NULL) {
601  ColPartitionSet* columns = all_columns[gsearch.GridY()];
602  TBOX box = part->bounding_box();
603  int y = part->MidY();
604  ColPartition* left_column = columns->ColumnContaining(box.left(), y);
605  ColPartition* right_column = columns->ColumnContaining(box.right(), y);
606  // set distance from left column as space to the left
607  if (left_column) {
608  int left_space = MAX(0, box.left() - left_column->LeftAtY(y));
609  part->set_space_to_left(left_space);
610  }
611  // set distance from right column as space to the right
612  if (right_column) {
613  int right_space = MAX(0, right_column->RightAtY(y) - box.right());
614  part->set_space_to_right(right_space);
615  }
616 
617  // Look for images that may be closer.
618  // NOTE: used to be part_grid_, might cause issues now
619  ColPartitionGridSearch hsearch(grid);
620  hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
621  ColPartition* neighbor = NULL;
622  while ((neighbor = hsearch.NextSideSearch(true)) != NULL) {
623  if (neighbor->type() == PT_PULLOUT_IMAGE ||
624  neighbor->type() == PT_FLOWING_IMAGE ||
625  neighbor->type() == PT_HEADING_IMAGE) {
626  int right = neighbor->bounding_box().right();
627  if (right < box.left()) {
628  int space = MIN(box.left() - right, part->space_to_left());
629  part->set_space_to_left(space);
630  }
631  }
632  }
633  hsearch.StartSideSearch(box.left(), box.bottom(), box.top());
634  neighbor = NULL;
635  while ((neighbor = hsearch.NextSideSearch(false)) != NULL) {
636  if (neighbor->type() == PT_PULLOUT_IMAGE ||
637  neighbor->type() == PT_FLOWING_IMAGE ||
638  neighbor->type() == PT_HEADING_IMAGE) {
639  int left = neighbor->bounding_box().left();
640  if (left > box.right()) {
641  int space = MIN(left - box.right(), part->space_to_right());
642  part->set_space_to_right(space);
643  }
644  }
645  }
646 
647  ColPartition* upper_part = part->SingletonPartner(true);
648  if (upper_part) {
649  int space = MAX(0, upper_part->bounding_box().bottom() -
650  part->bounding_box().bottom());
651  part->set_space_above(space);
652  } else {
653  // TODO(nbeato): What constitutes a good value?
654  // 0 is the default value when not set, explicitly noting it needs to
655  // be something else.
656  part->set_space_above(MAX_INT32);
657  }
658 
659  ColPartition* lower_part = part->SingletonPartner(false);
660  if (lower_part) {
661  int space = MAX(0, part->bounding_box().bottom() -
662  lower_part->bounding_box().bottom());
663  part->set_space_below(space);
664  } else {
665  // TODO(nbeato): What constitutes a good value?
666  // 0 is the default value when not set, explicitly noting it needs to
667  // be something else.
668  part->set_space_below(MAX_INT32);
669  }
670  }
671 }
672 
673 // Set spacing and closest neighbors above and below a given colpartition.
675  TBOX box = part->bounding_box();
676  int top_range = MIN(box.top() + kMaxVerticalSpacing, tright().y());
677  int bottom_range = MAX(box.bottom() - kMaxVerticalSpacing, bleft().y());
678  box.set_top(top_range);
679  box.set_bottom(bottom_range);
680 
681  TBOX part_box = part->bounding_box();
682  // Start a rect search
684  rectsearch(&clean_part_grid_);
685  rectsearch.StartRectSearch(box);
686  ColPartition* neighbor;
687  int min_space_above = kMaxVerticalSpacing;
688  int min_space_below = kMaxVerticalSpacing;
689  ColPartition* above_neighbor = NULL;
690  ColPartition* below_neighbor = NULL;
691  while ((neighbor = rectsearch.NextRectSearch()) != NULL) {
692  if (neighbor == part)
693  continue;
694  TBOX neighbor_box = neighbor->bounding_box();
695  if (neighbor_box.major_x_overlap(part_box)) {
696  int gap = abs(part->median_bottom() - neighbor->median_bottom());
697  // If neighbor is below current partition
698  if (neighbor_box.top() < part_box.bottom() &&
699  gap < min_space_below) {
700  min_space_below = gap;
701  below_neighbor = neighbor;
702  } // If neighbor is above current partition
703  else if (part_box.top() < neighbor_box.bottom() &&
704  gap < min_space_above) {
705  min_space_above = gap;
706  above_neighbor = neighbor;
707  }
708  }
709  }
710  part->set_space_above(min_space_above);
711  part->set_space_below(min_space_below);
712  part->set_nearest_neighbor_above(above_neighbor);
713  part->set_nearest_neighbor_below(below_neighbor);
714 }
715 
716 // Set global spacing and x-height estimates
718  STATS xheight_stats(0, kMaxVerticalSpacing + 1);
719  STATS width_stats(0, kMaxBlobWidth + 1);
720  STATS ledding_stats(0, kMaxVerticalSpacing + 1);
721  // Iterate the ColPartitions in the grid.
722  ColPartitionGridSearch gsearch(grid);
723  gsearch.SetUniqueMode(true);
724  gsearch.StartFullSearch();
725  ColPartition* part = NULL;
726  while ((part = gsearch.NextFullSearch()) != NULL) {
727  // TODO(nbeato): HACK HACK HACK! medians are equal to partition length.
728  // ComputeLimits needs to get called somewhere outside of TableFinder
729  // to make sure the partitions are properly initialized.
730  // When this is called, SmoothPartitionPartners dies in an assert after
731  // table find runs. Alternative solution.
732  // part->ComputeLimits();
733  if (part->IsTextType()) {
734  // xheight_stats.add(part->median_size(), part->boxes_count());
735  // width_stats.add(part->median_width(), part->boxes_count());
736 
737  // This loop can be removed when above issues are fixed.
738  // Replace it with the 2 lines commented out above.
739  BLOBNBOX_C_IT it(part->boxes());
740  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
741  xheight_stats.add(it.data()->bounding_box().height(), 1);
742  width_stats.add(it.data()->bounding_box().width(), 1);
743  }
744 
745  ledding_stats.add(part->space_above(), 1);
746  ledding_stats.add(part->space_below(), 1);
747  }
748  }
749  // Set estimates based on median of statistics obtained
750  set_global_median_xheight(static_cast<int>(xheight_stats.median() + 0.5));
751  set_global_median_blob_width(static_cast<int>(width_stats.median() + 0.5));
752  set_global_median_ledding(static_cast<int>(ledding_stats.median() + 0.5));
753  #ifndef GRAPHICS_DISABLED
755  const char* kWindowName = "X-height (R), X-width (G), and ledding (B)";
756  ScrollView* stats_win = MakeWindow(500, 10, kWindowName);
757  xheight_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::RED);
758  width_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::GREEN);
759  ledding_stats.plot(stats_win, 10, 200, 2, 15, ScrollView::BLUE);
760  }
761  #endif // GRAPHICS_DISABLED
762 }
763 
765  global_median_xheight_ = xheight;
766 }
769 }
771  global_median_ledding_ = ledding;
772 }
773 
776  gsearch.StartFullSearch();
777  ColPartition* part = NULL;
778  while ((part = gsearch.NextFullSearch()) != NULL) {
779  // TODO(nbeato): Rename this function, meaning is different now.
780  // IT is finding nearest neighbors its own way
781  //SetVerticalSpacing(part);
782 
783  ColPartition* upper = part->SingletonPartner(true);
784  if (upper)
785  part->set_nearest_neighbor_above(upper);
786 
787  ColPartition* lower = part->SingletonPartner(false);
788  if (lower)
789  part->set_nearest_neighbor_below(lower);
790  }
791 }
792 
793 // High level interface. Input is an unmarked ColPartitionGrid
794 // (namely, clean_part_grid_). Partitions are identified using local
795 // information and filter/smoothed. The function exit should contain
796 // a good sampling of the table partitions.
800  ScrollView* table_win = MakeWindow(300, 300, "Initial Table Partitions");
804  }
807  ScrollView* table_win = MakeWindow(600, 300, "Filtered Table Partitions");
811  }
814  ScrollView* table_win = MakeWindow(900, 300, "Smoothed Table Partitions");
818  }
821  ScrollView* table_win = MakeWindow(900, 300, "Final Table Partitions");
825  }
826 }
827 
828 // These types of partitions are marked as table partitions:
829 // 1- Partitions that have at lease one large gap between words
830 // 2- Partitions that consist of only one word (no significant gap
831 // between components)
832 // 3- Partitions that vertically overlap with other partitions within the
833 // same column.
834 // 4- Partitions with leaders before/after them.
836  // Iterate the ColPartitions in the grid.
838  gsearch(&clean_part_grid_);
839  gsearch.StartFullSearch();
840  ColPartition* part = NULL;
841  while ((part = gsearch.NextFullSearch()) != NULL) {
842  if (!part->IsTextType()) // Only consider text partitions
843  continue;
844  // Only consider partitions in dominant font size or smaller
846  continue;
847  // Mark partitions with a large gap, or no significant gap as
848  // table partitions.
849  // Comments: It produces several false alarms at:
850  // - last line of a paragraph (fixed)
851  // - single word section headings
852  // - page headers and footers
853  // - numbered equations
854  // - line drawing regions
855  // TODO(faisal): detect and fix above-mentioned cases
856  if (HasWideOrNoInterWordGap(part) ||
857  HasLeaderAdjacent(*part)) {
858  part->set_table_type();
859  }
860  }
861 }
862 
863 // Check if the partition has at least one large gap between words or no
864 // significant gap at all
866  // Should only get text partitions.
867  ASSERT_HOST(part->IsTextType());
868  // Blob access
869  BLOBNBOX_CLIST* part_boxes = part->boxes();
870  BLOBNBOX_C_IT it(part_boxes);
871  // Check if this is a relatively small partition (such as a single word)
872  if (part->bounding_box().width() <
874  part_boxes->length() < kMinBoxesInTextPartition)
875  return true;
876 
877  // Variables used to compute inter-blob spacing.
878  int current_x0 = -1;
879  int current_x1 = -1;
880  int previous_x1 = -1;
881  // Stores the maximum gap detected.
882  int largest_partition_gap_found = -1;
883  // Text partition gap limits. If this is text (and not a table),
884  // there should be at least one gap larger than min_gap and no gap
885  // larger than max_gap.
886  const double max_gap = kMaxGapInTextPartition * part->median_size();
887  const double min_gap = kMinMaxGapInTextPartition * part->median_size();
888 
889  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
890  BLOBNBOX* blob = it.data();
891  current_x0 = blob->bounding_box().left();
892  current_x1 = blob->bounding_box().right();
893  if (previous_x1 != -1) {
894  int gap = current_x0 - previous_x1;
895 
896  // TODO(nbeato): Boxes may overlap? Huh?
897  // For example, mag.3B 8003_033.3B.tif in UNLV data. The titles/authors
898  // on the top right of the page are filtered out with this line.
899  // Note 2: Iterating over blobs in a partition, so we are looking for
900  // spacing between the words.
901  if (gap < 0) {
902  // More likely case, the blobs slightly overlap. This can happen
903  // with diacritics (accents) or broken alphabet symbols (characters).
904  // Merge boxes together by taking max of right sides.
905  if (-gap < part->median_size() * kMaxBlobOverlapFactor) {
906  previous_x1 = MAX(previous_x1, current_x1);
907  continue;
908  }
909  // Extreme case, blobs overlap significantly in the same partition...
910  // This should not happen often (if at all), but it does.
911  // TODO(nbeato): investigate cases when this happens.
912  else {
913  // The behavior before was to completely ignore this case.
914  }
915  }
916 
917  // If a large enough gap is found, mark it as a table cell (return true)
918  if (gap > max_gap)
919  return true;
920  if (gap > largest_partition_gap_found)
921  largest_partition_gap_found = gap;
922  }
923  previous_x1 = current_x1;
924  }
925  // Since no large gap was found, return false if the partition is too
926  // long to be a data cell
927  if (part->bounding_box().width() >
929  part_boxes->length() > kMaxBoxesInDataPartition)
930  return false;
931 
932  // A partition may be a single blob. In this case, it's an isolated symbol
933  // or non-text (such as a ruling or image).
934  // Detect these as table partitions? Shouldn't this be case by case?
935  // The behavior before was to ignore this, making max_partition_gap < 0
936  // and implicitly return true. Just making it explicit.
937  if (largest_partition_gap_found == -1)
938  return true;
939 
940  // return true if the maximum gap found is smaller than the minimum allowed
941  // max_gap in a text partition. This indicates that there is no significant
942  // space in the partition, hence it is likely a single word.
943  return largest_partition_gap_found < min_gap;
944 }
945 
946 // A criteria for possible tables is that a table may have leaders
947 // between data cells. An aggressive solution to find such tables is to
948 // explicitly mark partitions that have adjacent leaders.
949 // Note that this includes overlapping leaders. However, it does not
950 // include leaders in different columns on the page.
951 // Possible false-positive will include lists, such as a table of contents.
952 // As these arise, the aggressive nature of this search may need to be
953 // trimmed down.
955  if (part.flow() == BTFT_LEADER)
956  return true;
957  // Search range is left and right bounded by an offset of the
958  // median xheight. This offset is to allow some tolerance to the
959  // the leaders on the page in the event that the alignment is still
960  // a bit off.
961  const TBOX& box = part.bounding_box();
962  const int search_size = kAdjacentLeaderSearchPadding * global_median_xheight_;
963  const int top = box.top() + search_size;
964  const int bottom = box.bottom() - search_size;
966  for (int direction = 0; direction < 2; ++direction) {
967  bool right_to_left = (direction == 0);
968  int x = right_to_left ? box.right() : box.left();
969  hsearch.StartSideSearch(x, bottom, top);
970  ColPartition* leader = NULL;
971  while ((leader = hsearch.NextSideSearch(right_to_left)) != NULL) {
972  // The leader could be a horizontal ruling in the grid.
973  // Make sure it is actually a leader.
974  if (leader->flow() != BTFT_LEADER)
975  continue;
976  // This should not happen, they are in different grids.
977  ASSERT_HOST(&part != leader);
978  // Make sure the leader shares a page column with the partition,
979  // otherwise we are spreading across columns.
980  if (!part.IsInSameColumnAs(*leader))
981  break;
982  // There should be a significant vertical overlap
983  if (!leader->VSignificantCoreOverlap(part))
984  continue;
985  // Leader passed all tests, so it is adjacent.
986  return true;
987  }
988  }
989  // No leaders are adjacent to the given partition.
990  return false;
991 }
992 
993 // Filter individual text partitions marked as table partitions
994 // consisting of paragraph endings, small section headings, and
995 // headers and footers.
999  // TODO(nbeato): Fully justified text as non-table?
1000 }
1001 
1003  // Detect last line of paragraph
1004  // Iterate the ColPartitions in the grid.
1006  gsearch.StartFullSearch();
1007  ColPartition* part = NULL;
1008  while ((part = gsearch.NextFullSearch()) != NULL) {
1009  if (part->type() != PT_TABLE)
1010  continue; // Consider only table partitions
1011 
1012  // Paragraph ending should have flowing text above it.
1013  ColPartition* upper_part = part->nearest_neighbor_above();
1014  if (!upper_part)
1015  continue;
1016  if (upper_part->type() != PT_FLOWING_TEXT)
1017  continue;
1018  if (upper_part->bounding_box().width() <
1019  2 * part->bounding_box().width())
1020  continue;
1021  // Check if its the last line of a paragraph.
1022  // In most cases, a paragraph ending should be left-aligned to text line
1023  // above it. Sometimes, it could be a 2 line paragraph, in which case
1024  // the line above it is indented.
1025  // To account for that, check if the partition center is to
1026  // the left of the one above it.
1027  int mid = (part->bounding_box().left() + part->bounding_box().right()) / 2;
1028  int upper_mid = (upper_part->bounding_box().left() +
1029  upper_part->bounding_box().right()) / 2;
1030  int current_spacing = 0; // spacing of the current line to margin
1031  int upper_spacing = 0; // spacing of the previous line to the margin
1033  // Left to right languages, use mid - left to figure out the distance
1034  // the middle is from the left margin.
1035  int left = MIN(part->bounding_box().left(),
1036  upper_part->bounding_box().left());
1037  current_spacing = mid - left;
1038  upper_spacing = upper_mid - left;
1039  } else {
1040  // Right to left languages, use right - mid to figure out the distance
1041  // the middle is from the right margin.
1042  int right = MAX(part->bounding_box().right(),
1043  upper_part->bounding_box().right());
1044  current_spacing = right - mid;
1045  upper_spacing = right - upper_mid;
1046  }
1047  if (current_spacing * kParagraphEndingPreviousLineRatio > upper_spacing)
1048  continue;
1049 
1050  // Paragraphs should have similar fonts.
1051  if (!part->MatchingSizes(*upper_part) ||
1054  continue;
1055  }
1056 
1057  // The last line of a paragraph should be left aligned.
1058  // TODO(nbeato): This would be untrue if the text was right aligned.
1059  // How often is that?
1060  if (part->space_to_left() >
1062  continue;
1063  // The line above it should be right aligned (assuming justified format).
1064  // Since we can't assume justified text, we compare whitespace to text.
1065  // The above line should have majority spanning text (or the current
1066  // line could have fit on the previous line). So compare
1067  // whitespace to text.
1068  if (upper_part->bounding_box().width() <
1070  continue;
1071 
1072  // Ledding above the line should be less than ledding below
1073  if (part->space_above() >= part->space_below() ||
1074  part->space_above() > 2 * global_median_ledding_)
1075  continue;
1076 
1077  // If all checks failed, it is probably text.
1078  part->clear_table_type();
1079  }
1080 }
1081 
1083  // Consider top-most text colpartition as header and bottom most as footer
1084  ColPartition* header = NULL;
1085  ColPartition* footer = NULL;
1086  int max_top = MIN_INT32;
1087  int min_bottom = MAX_INT32;
1089  gsearch.StartFullSearch();
1090  ColPartition* part = NULL;
1091  while ((part = gsearch.NextFullSearch()) != NULL) {
1092  if (!part->IsTextType())
1093  continue; // Consider only text partitions
1094  int top = part->bounding_box().top();
1095  int bottom = part->bounding_box().bottom();
1096  if (top > max_top) {
1097  max_top = top;
1098  header = part;
1099  }
1100  if (bottom < min_bottom) {
1101  min_bottom = bottom;
1102  footer = part;
1103  }
1104  }
1105  if (header)
1106  header->clear_table_type();
1107  if (footer)
1108  footer->clear_table_type();
1109 }
1110 
1111 // Mark all ColPartitions as table cells that have a table cell above
1112 // and below them
1113 // TODO(faisal): This is too aggressive at the moment. The method needs to
1114 // consider spacing and alignment as well. Detection of false alarm table cells
1115 // should also be done as part of it.
1117  // Iterate the ColPartitions in the grid.
1119  gsearch.StartFullSearch();
1120  ColPartition* part = NULL;
1121  while ((part = gsearch.NextFullSearch()) != NULL) {
1122  if (part->type() >= PT_TABLE || part->type() == PT_UNKNOWN)
1123  continue; // Consider only text partitions
1124  ColPartition* upper_part = part->nearest_neighbor_above();
1125  ColPartition* lower_part = part->nearest_neighbor_below();
1126  if (!upper_part || !lower_part)
1127  continue;
1128  if (upper_part->type() == PT_TABLE && lower_part->type() == PT_TABLE)
1129  part->set_table_type();
1130  }
1131 
1132  // Pass 2, do the opposite. If both the upper and lower neighbors
1133  // exist and are not tables, this probably shouldn't be a table.
1134  gsearch.StartFullSearch();
1135  part = NULL;
1136  while ((part = gsearch.NextFullSearch()) != NULL) {
1137  if (part->type() != PT_TABLE)
1138  continue; // Consider only text partitions
1139  ColPartition* upper_part = part->nearest_neighbor_above();
1140  ColPartition* lower_part = part->nearest_neighbor_below();
1141 
1142  // table can't be by itself
1143  if ((upper_part && upper_part->type() != PT_TABLE) &&
1144  (lower_part && lower_part->type() != PT_TABLE)) {
1145  part->clear_table_type();
1146  }
1147  }
1148 }
1149 
1150 // Set the type of a column segment based on the ratio of table to text cells
1151 void TableFinder::SetColumnsType(ColSegment_LIST* column_blocks) {
1152  ColSegment_IT it(column_blocks);
1153  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1154  ColSegment* seg = it.data();
1155  TBOX box = seg->bounding_box();
1156  int num_table_cells = 0;
1157  int num_text_cells = 0;
1159  rsearch(&clean_part_grid_);
1160  rsearch.SetUniqueMode(true);
1161  rsearch.StartRectSearch(box);
1162  ColPartition* part = NULL;
1163  while ((part = rsearch.NextRectSearch()) != NULL) {
1164  if (part->type() == PT_TABLE) {
1165  num_table_cells++;
1166  } else if (part->type() == PT_FLOWING_TEXT) {
1167  num_text_cells++;
1168  }
1169  }
1170  // If a column block has no text or table partition in it, it is not needed
1171  // for table detection.
1172  if (!num_table_cells && !num_text_cells) {
1173  delete it.extract();
1174  } else {
1175  seg->set_num_table_cells(num_table_cells);
1176  seg->set_num_text_cells(num_text_cells);
1177  // set column type based on the ratio of table to text cells
1178  seg->set_type();
1179  }
1180  }
1181 }
1182 
1183 // Move column blocks to grid
1184 void TableFinder::MoveColSegmentsToGrid(ColSegment_LIST *segments,
1185  ColSegmentGrid *col_seg_grid) {
1186  ColSegment_IT it(segments);
1187  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1188  ColSegment* seg = it.extract();
1189  col_seg_grid->InsertBBox(true, true, seg);
1190  }
1191 }
1192 
1193 // Merge column blocks if a split is detected due to the presence of a
1194 // table. A text block is considered split if it has multiple
1195 // neighboring blocks above/below it, and at least one of the
1196 // neighboring blocks is of table type (has a high density of table
1197 // partitions). In this case neighboring blocks in the direction
1198 // (above/below) of the table block are merged with the text block.
1199 
1200 // Comment: This method does not handle split due to a full page table
1201 // since table columns in this case do not have a text column on which
1202 // split decision can be based.
1204  int margin = gridsize();
1205 
1206  // Iterate the Column Blocks in the grid.
1208  gsearch(&col_seg_grid_);
1209  gsearch.StartFullSearch();
1210  ColSegment* seg;
1211  while ((seg = gsearch.NextFullSearch()) != NULL) {
1212  if (seg->type() != COL_TEXT)
1213  continue; // only consider text blocks for split detection
1214  bool neighbor_found = false;
1215  bool modified = false; // Modified at least once
1216  // keep expanding current box as long as neighboring table columns
1217  // are found above or below it.
1218  do {
1219  TBOX box = seg->bounding_box();
1220  // slightly expand the search region vertically
1221  int top_range = MIN(box.top() + margin, tright().y());
1222  int bottom_range = MAX(box.bottom() - margin, bleft().y());
1223  box.set_top(top_range);
1224  box.set_bottom(bottom_range);
1225  neighbor_found = false;
1227  rectsearch(&col_seg_grid_);
1228  rectsearch.StartRectSearch(box);
1229  ColSegment* neighbor = NULL;
1230  while ((neighbor = rectsearch.NextRectSearch()) != NULL) {
1231  if (neighbor == seg)
1232  continue;
1233  const TBOX& neighbor_box = neighbor->bounding_box();
1234  // If the neighbor box significantly overlaps with the current
1235  // box (due to the expansion of the current box in the
1236  // previous iteration of this loop), remove the neighbor box
1237  // and expand the current box to include it.
1238  if (neighbor_box.overlap_fraction(box) >= 0.9) {
1239  seg->InsertBox(neighbor_box);
1240  modified = true;
1241  rectsearch.RemoveBBox();
1242  gsearch.RepositionIterator();
1243  delete neighbor;
1244  continue;
1245  }
1246  // Only expand if the neighbor box is of table type
1247  if (neighbor->type() != COL_TABLE)
1248  continue;
1249  // Insert the neighbor box into the current column block
1250  if (neighbor_box.major_x_overlap(box) &&
1251  !box.contains(neighbor_box)) {
1252  seg->InsertBox(neighbor_box);
1253  neighbor_found = true;
1254  modified = true;
1255  rectsearch.RemoveBBox();
1256  gsearch.RepositionIterator();
1257  delete neighbor;
1258  }
1259  }
1260  } while (neighbor_found);
1261  if (modified) {
1262  // Because the box has changed, it has to be removed first.
1263  gsearch.RemoveBBox();
1264  col_seg_grid_.InsertBBox(true, true, seg);
1265  gsearch.RepositionIterator();
1266  }
1267  }
1268 }
1269 
1270 // Group horizontally overlapping table partitions into table columns.
1271 // TODO(faisal): This is too aggressive at the moment. The method should
1272 // consider more attributes to group table partitions together. Some common
1273 // errors are:
1274 // 1- page number is merged with a table column above it even
1275 // if there is a large vertical gap between them.
1276 // 2- column headers go on to catch one of the columns arbitrarily
1277 // 3- an isolated noise blob near page top or bottom merges with the table
1278 // column below/above it
1279 // 4- cells from two vertically adjacent tables merge together to make a
1280 // single column resulting in merging of the two tables
1281 void TableFinder::GetTableColumns(ColSegment_LIST *table_columns) {
1282  ColSegment_IT it(table_columns);
1283  // Iterate the ColPartitions in the grid.
1285  gsearch(&clean_part_grid_);
1286  gsearch.StartFullSearch();
1287  ColPartition* part;
1288  while ((part = gsearch.NextFullSearch()) != NULL) {
1289  if (part->inside_table_column() || part->type() != PT_TABLE)
1290  continue; // prevent a partition to be assigned to multiple columns
1291  const TBOX& box = part->bounding_box();
1292  ColSegment* col = new ColSegment();
1293  col->InsertBox(box);
1294  part->set_inside_table_column(true);
1295  // Start a search below the current cell to find bottom neighbours
1296  // Note: a full search will always process things above it first, so
1297  // this should be starting at the highest cell and working its way down.
1299  vsearch(&clean_part_grid_);
1300  vsearch.StartVerticalSearch(box.left(), box.right(), box.bottom());
1301  ColPartition* neighbor = NULL;
1302  bool found_neighbours = false;
1303  while ((neighbor = vsearch.NextVerticalSearch(true)) != NULL) {
1304  // only consider neighbors not assigned to any column yet
1305  if (neighbor->inside_table_column())
1306  continue;
1307  // Horizontal lines should not break the flow
1308  if (neighbor->IsHorizontalLine())
1309  continue;
1310  // presence of a non-table neighbor marks the end of current
1311  // table column
1312  if (neighbor->type() != PT_TABLE)
1313  break;
1314  // add the neighbor partition to the table column
1315  const TBOX& neighbor_box = neighbor->bounding_box();
1316  col->InsertBox(neighbor_box);
1317  neighbor->set_inside_table_column(true);
1318  found_neighbours = true;
1319  }
1320  if (found_neighbours) {
1321  it.add_after_then_move(col);
1322  } else {
1323  part->set_inside_table_column(false);
1324  delete col;
1325  }
1326  }
1327 }
1328 
1329 // Mark regions in a column that are x-bounded by the column boundaries and
1330 // y-bounded by the table columns' projection on the y-axis as table regions
1331 void TableFinder::GetTableRegions(ColSegment_LIST* table_columns,
1332  ColSegment_LIST* table_regions) {
1333  ColSegment_IT cit(table_columns);
1334  ColSegment_IT rit(table_regions);
1335  // Iterate through column blocks
1337  gsearch(&col_seg_grid_);
1338  gsearch.StartFullSearch();
1339  ColSegment* part;
1340  int page_height = tright().y() - bleft().y();
1341  ASSERT_HOST(page_height > 0);
1342  // create a bool array to hold projection on y-axis
1343  bool* table_region = new bool[page_height];
1344  while ((part = gsearch.NextFullSearch()) != NULL) {
1345  const TBOX& part_box = part->bounding_box();
1346  // reset the projection array
1347  for (int i = 0; i < page_height; i++) {
1348  table_region[i] = false;
1349  }
1350  // iterate through all table columns to find regions in the current
1351  // page column block
1352  cit.move_to_first();
1353  for (cit.mark_cycle_pt(); !cit.cycled_list(); cit.forward()) {
1354  TBOX col_box = cit.data()->bounding_box();
1355  // find intersection region of table column and page column
1356  TBOX intersection_box = col_box.intersection(part_box);
1357  // project table column on the y-axis
1358  for (int i = intersection_box.bottom(); i < intersection_box.top(); i++) {
1359  table_region[i - bleft().y()] = true;
1360  }
1361  }
1362  // set x-limits of table regions to page column width
1363  TBOX current_table_box;
1364  current_table_box.set_left(part_box.left());
1365  current_table_box.set_right(part_box.right());
1366  // go through the y-axis projection to find runs of table
1367  // regions. Each run makes one table region.
1368  for (int i = 1; i < page_height; i++) {
1369  // detect start of a table region
1370  if (!table_region[i - 1] && table_region[i]) {
1371  current_table_box.set_bottom(i + bleft().y());
1372  }
1373  // TODO(nbeato): Is it guaranteed that the last row is not a table region?
1374  // detect end of a table region
1375  if (table_region[i - 1] && !table_region[i]) {
1376  current_table_box.set_top(i + bleft().y());
1377  if (!current_table_box.null_box()) {
1378  ColSegment* seg = new ColSegment();
1379  seg->InsertBox(current_table_box);
1380  rit.add_after_then_move(seg);
1381  }
1382  }
1383  }
1384  }
1385  delete[] table_region;
1386 }
1387 
1388 // Merge table regions corresponding to tables spanning multiple columns if
1389 // there is a colpartition (horizontal ruling line or normal text) that
1390 // touches both regions.
1391 // TODO(faisal): A rare error occurs if there are two horizontally adjacent
1392 // tables with aligned ruling lines. In this case, line finder returns a
1393 // single line and hence the tables get merged together
1395  // Iterate the table regions in the grid.
1397  gsearch(&table_grid_);
1398  gsearch.StartFullSearch();
1399  ColSegment* seg = NULL;
1400  while ((seg = gsearch.NextFullSearch()) != NULL) {
1401  bool neighbor_found = false;
1402  bool modified = false; // Modified at least once
1403  do {
1404  // Start a rectangle search x-bounded by the image and y by the table
1405  const TBOX& box = seg->bounding_box();
1406  TBOX search_region(box);
1407  search_region.set_left(bleft().x());
1408  search_region.set_right(tright().x());
1409  neighbor_found = false;
1411  rectsearch(&table_grid_);
1412  rectsearch.StartRectSearch(search_region);
1413  ColSegment* neighbor = NULL;
1414  while ((neighbor = rectsearch.NextRectSearch()) != NULL) {
1415  if (neighbor == seg)
1416  continue;
1417  const TBOX& neighbor_box = neighbor->bounding_box();
1418  // Check if a neighbor box has a large overlap with the table
1419  // region. This may happen as a result of merging two table
1420  // regions in the previous iteration.
1421  if (neighbor_box.overlap_fraction(box) >= 0.9) {
1422  seg->InsertBox(neighbor_box);
1423  rectsearch.RemoveBBox();
1424  gsearch.RepositionIterator();
1425  delete neighbor;
1426  modified = true;
1427  continue;
1428  }
1429  // Check if two table regions belong together based on a common
1430  // horizontal ruling line
1431  if (BelongToOneTable(box, neighbor_box)) {
1432  seg->InsertBox(neighbor_box);
1433  neighbor_found = true;
1434  modified = true;
1435  rectsearch.RemoveBBox();
1436  gsearch.RepositionIterator();
1437  delete neighbor;
1438  }
1439  }
1440  } while (neighbor_found);
1441  if (modified) {
1442  // Because the box has changed, it has to be removed first.
1443  gsearch.RemoveBBox();
1444  table_grid_.InsertBBox(true, true, seg);
1445  gsearch.RepositionIterator();
1446  }
1447  }
1448 }
1449 
1450 // Decide if two table regions belong to one table based on a common
1451 // horizontal ruling line or another colpartition
1452 bool TableFinder::BelongToOneTable(const TBOX &box1, const TBOX &box2) {
1453  // Check the obvious case. Most likely not true because overlapping boxes
1454  // should already be merged, but seems like a good thing to do in case things
1455  // change.
1456  if (box1.overlap(box2))
1457  return true;
1458  // Check for ColPartitions spanning both table regions
1459  TBOX bbox = box1.bounding_union(box2);
1460  // Start a rect search on bbox
1462  rectsearch(&clean_part_grid_);
1463  rectsearch.StartRectSearch(bbox);
1464  ColPartition* part = NULL;
1465  while ((part = rectsearch.NextRectSearch()) != NULL) {
1466  const TBOX& part_box = part->bounding_box();
1467  // return true if a colpartition spanning both table regions is found
1468  if (part_box.overlap(box1) && part_box.overlap(box2) &&
1469  !part->IsImageType())
1470  return true;
1471  }
1472  return false;
1473 }
1474 
1475 // Adjust table boundaries by:
1476 // - building a tight bounding box around all ColPartitions contained in it.
1477 // - expanding table boundaries to include all colpartitions that overlap the
1478 // table by more than half of their area
1479 // - expanding table boundaries to include nearby horizontal rule lines
1480 // - expanding table vertically to include left out column headers
1481 // TODO(faisal): Expansion of table boundaries is quite aggressive. It usually
1482 // makes following errors:
1483 // 1- horizontal lines consisting of underlines are included in the table if
1484 // they are close enough
1485 // 2- horizontal lines originating from noise tend to get merged with a table
1486 // near the top of the page
1487 // 3- the criteria for including horizontal lines is very generous. Many times
1488 // horizontal lines separating headers and footers get merged with a
1489 // single-column table in a multi-column page thereby including text
1490 // from the neighboring column inside the table
1491 // 4- the criteria for including left out column headers also tends to
1492 // occasionally include text-lines above the tables, typically from
1493 // table caption
1495  // Iterate the table regions in the grid
1496  ColSegment_CLIST adjusted_tables;
1497  ColSegment_C_IT it(&adjusted_tables);
1499  gsearch.StartFullSearch();
1500  ColSegment* table = NULL;
1501  while ((table = gsearch.NextFullSearch()) != NULL) {
1502  const TBOX& table_box = table->bounding_box();
1503  TBOX grown_box = table_box;
1504  GrowTableBox(table_box, &grown_box);
1505  // To prevent a table from expanding again, do not insert the
1506  // modified box back to the grid. Instead move it to a list and
1507  // and remove it from the grid. The list is moved later back to the grid.
1508  if (!grown_box.null_box()) {
1509  ColSegment* col = new ColSegment();
1510  col->InsertBox(grown_box);
1511  it.add_after_then_move(col);
1512  }
1513  gsearch.RemoveBBox();
1514  delete table;
1515  }
1516  // clear table grid to move final tables in it
1517  // TODO(nbeato): table_grid_ should already be empty. The above loop
1518  // removed everything. Maybe just assert it is empty?
1519  table_grid_.Clear();
1520  it.move_to_first();
1521  // move back final tables to table_grid_
1522  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1523  ColSegment* seg = it.extract();
1524  table_grid_.InsertBBox(true, true, seg);
1525  }
1526 }
1527 
1528 void TableFinder::GrowTableBox(const TBOX& table_box, TBOX* result_box) {
1529  // TODO(nbeato): The growing code is a bit excessive right now.
1530  // By removing these lines, the partitions considered need
1531  // to have some overlap or be special cases. These lines could
1532  // be added again once a check is put in place to make sure that
1533  // growing tables don't stomp on a lot of non-table partitions.
1534 
1535  // search for horizontal ruling lines within the vertical margin
1536  // int vertical_margin = kRulingVerticalMargin * gridsize();
1537  TBOX search_box = table_box;
1538  // int top = MIN(search_box.top() + vertical_margin, tright().y());
1539  // int bottom = MAX(search_box.bottom() - vertical_margin, bleft().y());
1540  // search_box.set_top(top);
1541  // search_box.set_bottom(bottom);
1542 
1543  GrowTableToIncludePartials(table_box, search_box, result_box);
1544  GrowTableToIncludeLines(table_box, search_box, result_box);
1545  IncludeLeftOutColumnHeaders(result_box);
1546 }
1547 
1548 // Grow a table by increasing the size of the box to include
1549 // partitions with significant overlap with the table.
1551  const TBOX& search_range,
1552  TBOX* result_box) {
1553  // Rulings are in a different grid, so search 2 grids for rulings, text,
1554  // and table partitions that are not entirely within the new box.
1555  for (int i = 0; i < 2; ++i) {
1556  ColPartitionGrid* grid = (i == 0) ? &fragmented_text_grid_ :
1558  ColPartitionGridSearch rectsearch(grid);
1559  rectsearch.StartRectSearch(search_range);
1560  ColPartition* part = NULL;
1561  while ((part = rectsearch.NextRectSearch()) != NULL) {
1562  // Only include text and table types.
1563  if (part->IsImageType())
1564  continue;
1565  const TBOX& part_box = part->bounding_box();
1566  // Include partition in the table if more than half of it
1567  // is covered by the table
1568  if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) {
1569  *result_box = result_box->bounding_union(part_box);
1570  continue;
1571  }
1572  }
1573  }
1574 }
1575 
1576 // Grow a table by expanding to the extents of significantly
1577 // overlapping lines.
1579  const TBOX& search_range,
1580  TBOX* result_box) {
1582  rsearch.SetUniqueMode(true);
1583  rsearch.StartRectSearch(search_range);
1584  ColPartition* part = NULL;
1585  while ((part = rsearch.NextRectSearch()) != NULL) {
1586  // TODO(nbeato) This should also do vertical, but column
1587  // boundaries are breaking things. This function needs to be
1588  // updated to allow vertical lines as well.
1589  if (!part->IsLineType())
1590  continue;
1591  // Avoid the following function call if the result of the
1592  // function is irrelevant.
1593  const TBOX& part_box = part->bounding_box();
1594  if (result_box->contains(part_box))
1595  continue;
1596  // Include a partially overlapping horizontal line only if the
1597  // extra ColPartitions that will be included due to expansion
1598  // have large side spacing w.r.t. columns containing them.
1599  if (HLineBelongsToTable(*part, table_box))
1600  *result_box = result_box->bounding_union(part_box);
1601  // TODO(nbeato): Vertical
1602  }
1603 }
1604 
1605 // Checks whether the horizontal line belong to the table by looking at the
1606 // side spacing of extra ColParitions that will be included in the table
1607 // due to expansion
1609  const TBOX& table_box) {
1610  if (!part.IsHorizontalLine())
1611  return false;
1612  const TBOX& part_box = part.bounding_box();
1613  if (!part_box.major_x_overlap(table_box))
1614  return false;
1615  // Do not consider top-most horizontal line since it usually
1616  // originates from noise.
1617  // TODO(nbeato): I had to comment this out because the ruling grid doesn't
1618  // have neighbors solved.
1619  // if (!part.nearest_neighbor_above())
1620  // return false;
1621  const TBOX bbox = part_box.bounding_union(table_box);
1622  // In the "unioned table" box (the table extents expanded by the line),
1623  // keep track of how many partitions have significant padding to the left
1624  // and right. If more than half of the partitions covered by the new table
1625  // have significant spacing, the line belongs to the table and the table
1626  // grows to include all of the partitions.
1627  int num_extra_partitions = 0;
1628  int extra_space_to_right = 0;
1629  int extra_space_to_left = 0;
1630  // Rulings are in a different grid, so search 2 grids for rulings, text,
1631  // and table partitions that are introduced by the new box.
1632  for (int i = 0; i < 2; ++i) {
1633  ColPartitionGrid* grid = (i == 0) ? &clean_part_grid_ :
1635  // Start a rect search on bbox
1636  ColPartitionGridSearch rectsearch(grid);
1637  rectsearch.SetUniqueMode(true);
1638  rectsearch.StartRectSearch(bbox);
1639  ColPartition* extra_part = NULL;
1640  while ((extra_part = rectsearch.NextRectSearch()) != NULL) {
1641  // ColPartition already in table
1642  const TBOX& extra_part_box = extra_part->bounding_box();
1643  if (extra_part_box.overlap_fraction(table_box) > kMinOverlapWithTable)
1644  continue;
1645  // Non-text ColPartitions do not contribute
1646  if (extra_part->IsImageType())
1647  continue;
1648  // Consider this partition.
1649  num_extra_partitions++;
1650  // presence of a table cell is a strong hint, so just increment the scores
1651  // without looking at the spacing.
1652  if (extra_part->type() == PT_TABLE || extra_part->IsLineType()) {
1653  extra_space_to_right++;
1654  extra_space_to_left++;
1655  continue;
1656  }
1657  int space_threshold = kSideSpaceMargin * part.median_size();
1658  if (extra_part->space_to_right() > space_threshold)
1659  extra_space_to_right++;
1660  if (extra_part->space_to_left() > space_threshold)
1661  extra_space_to_left++;
1662  }
1663  }
1664  // tprintf("%d %d %d\n",
1665  // num_extra_partitions,extra_space_to_right,extra_space_to_left);
1666  return (extra_space_to_right > num_extra_partitions / 2) ||
1667  (extra_space_to_left > num_extra_partitions / 2);
1668 }
1669 
1670 // Look for isolated column headers above the given table box and
1671 // include them in the table
1673  // Start a search above the current table to look for column headers
1675  vsearch.StartVerticalSearch(table_box->left(), table_box->right(),
1676  table_box->top());
1677  ColPartition* neighbor = NULL;
1678  ColPartition* previous_neighbor = NULL;
1679  while ((neighbor = vsearch.NextVerticalSearch(false)) != NULL) {
1680  // Max distance to find a table heading.
1681  const int max_distance = kMaxColumnHeaderDistance *
1682  neighbor->median_size();
1683  int table_top = table_box->top();
1684  const TBOX& box = neighbor->bounding_box();
1685  // Do not continue if the next box is way above
1686  if (box.bottom() - table_top > max_distance)
1687  break;
1688  // Unconditionally include partitions of type TABLE or LINE
1689  // TODO(faisal): add some reasonable conditions here
1690  if (neighbor->type() == PT_TABLE || neighbor->IsLineType()) {
1691  table_box->set_top(box.top());
1692  previous_neighbor = NULL;
1693  continue;
1694  }
1695  // If there are two text partitions, one above the other, without a table
1696  // cell on their left or right side, consider them a barrier and quit
1697  if (previous_neighbor == NULL) {
1698  previous_neighbor = neighbor;
1699  } else {
1700  const TBOX& previous_box = previous_neighbor->bounding_box();
1701  if (!box.major_y_overlap(previous_box))
1702  break;
1703  }
1704  }
1705 }
1706 
1707 // Remove false alarms consiting of a single column based on their
1708 // projection on the x-axis. Projection of a real table on the x-axis
1709 // should have at least one zero-valley larger than the global median
1710 // x-height of the page.
1712  int page_width = tright().x() - bleft().x();
1713  ASSERT_HOST(page_width > 0);
1714  // create an integer array to hold projection on x-axis
1715  int* table_xprojection = new int[page_width];
1716  // Iterate through all tables in the table grid
1718  table_search(&table_grid_);
1719  table_search.StartFullSearch();
1720  ColSegment* table;
1721  while ((table = table_search.NextFullSearch()) != NULL) {
1722  TBOX table_box = table->bounding_box();
1723  // reset the projection array
1724  for (int i = 0; i < page_width; i++) {
1725  table_xprojection[i] = 0;
1726  }
1727  // Start a rect search on table_box
1729  rectsearch(&clean_part_grid_);
1730  rectsearch.SetUniqueMode(true);
1731  rectsearch.StartRectSearch(table_box);
1732  ColPartition* part;
1733  while ((part = rectsearch.NextRectSearch()) != NULL) {
1734  if (!part->IsTextType())
1735  continue; // Do not consider non-text partitions
1736  if (part->flow() == BTFT_LEADER)
1737  continue; // Assume leaders are in tables
1738  TBOX part_box = part->bounding_box();
1739  // Do not consider partitions partially covered by the table
1740  if (part_box.overlap_fraction(table_box) < kMinOverlapWithTable)
1741  continue;
1742  BLOBNBOX_CLIST* part_boxes = part->boxes();
1743  BLOBNBOX_C_IT pit(part_boxes);
1744 
1745  // Make sure overlapping blobs don't artificially inflate the number
1746  // of rows in the table. This happens frequently with things such as
1747  // decimals and split characters. Do this by assuming the column
1748  // partition is sorted mostly left to right and just clip
1749  // bounding boxes by the previous box's extent.
1750  int next_position_to_write = 0;
1751 
1752  for (pit.mark_cycle_pt(); !pit.cycled_list(); pit.forward()) {
1753  BLOBNBOX *pblob = pit.data();
1754  // ignore blob height for the purpose of projection since we
1755  // are only interested in finding valleys
1756  int xstart = pblob->bounding_box().left();
1757  int xend = pblob->bounding_box().right();
1758 
1759  xstart = MAX(xstart, next_position_to_write);
1760  for (int i = xstart; i < xend; i++)
1761  table_xprojection[i - bleft().x()]++;
1762  next_position_to_write = xend;
1763  }
1764  }
1765  // Find largest valley between two reasonable peaks in the table
1766  if (!GapInXProjection(table_xprojection, page_width)) {
1767  table_search.RemoveBBox();
1768  delete table;
1769  }
1770  }
1771  delete[] table_xprojection;
1772 }
1773 
1774 // Return true if at least one gap larger than the global x-height
1775 // exists in the horizontal projection
1776 bool TableFinder::GapInXProjection(int* xprojection, int length) {
1777  // Find peak value of the histogram
1778  int peak_value = 0;
1779  for (int i = 0; i < length; i++) {
1780  if (xprojection[i] > peak_value) {
1781  peak_value = xprojection[i];
1782  }
1783  }
1784  // Peak value represents the maximum number of horizontally
1785  // overlapping colpartitions, so this can be considered as the
1786  // number of rows in the table
1787  if (peak_value < kMinRowsInTable)
1788  return false;
1789  double projection_threshold = kSmallTableProjectionThreshold * peak_value;
1790  if (peak_value >= kLargeTableRowCount)
1791  projection_threshold = kLargeTableProjectionThreshold * peak_value;
1792  // Threshold the histogram
1793  for (int i = 0; i < length; i++) {
1794  xprojection[i] = (xprojection[i] >= projection_threshold) ? 1 : 0;
1795  }
1796  // Find the largest run of zeros between two ones
1797  int largest_gap = 0;
1798  int run_start = -1;
1799  for (int i = 1; i < length; i++) {
1800  // detect start of a run of zeros
1801  if (xprojection[i - 1] && !xprojection[i]) {
1802  run_start = i;
1803  }
1804  // detect end of a run of zeros and update the value of largest gap
1805  if (run_start != -1 && !xprojection[i - 1] && xprojection[i]) {
1806  int gap = i - run_start;
1807  if (gap > largest_gap)
1808  largest_gap = gap;
1809  run_start = -1;
1810  }
1811  }
1812  return largest_gap > kMaxXProjectionGapFactor * global_median_xheight_;
1813 }
1814 
1815 // Given the location of a table "guess", try to overlay a cellular
1816 // grid in the location, adjusting the boundaries.
1817 // TODO(nbeato): Falsely introduces:
1818 // -headers/footers (not any worse, too much overlap destroys cells)
1819 // -page numbers (not worse, included because maximize margins)
1820 // -equations (nicely fit into a celluar grid, but more sparsely)
1821 // -figures (random text box, also sparse)
1822 // -small left-aligned text areas with overlapping positioned whitespace
1823 // (rejected before)
1824 // Overall, this just needs some more work.
1826  ScrollView* table_win = NULL;
1827  if (textord_show_tables) {
1828  table_win = MakeWindow(0, 0, "Table Structure");
1831  // table_grid_.DisplayBoxes(table_win);
1832  }
1833 
1834 
1835  TableRecognizer recognizer;
1836  recognizer.Init();
1838  recognizer.set_text_grid(&fragmented_text_grid_);
1839  recognizer.set_max_text_height(global_median_xheight_ * 2.0);
1840  recognizer.set_min_height(1.5 * gridheight());
1841  // Loop over all of the tables and try to fit them.
1842  // Store the good tables here.
1843  ColSegment_CLIST good_tables;
1844  ColSegment_C_IT good_it(&good_tables);
1845 
1847  gsearch.StartFullSearch();
1848  ColSegment* found_table = NULL;
1849  while ((found_table = gsearch.NextFullSearch()) != NULL) {
1850  gsearch.RemoveBBox();
1851 
1852  // The goal is to make the tables persistent in a list.
1853  // When that happens, this will move into the search loop.
1854  const TBOX& found_box = found_table->bounding_box();
1855  StructuredTable* table_structure = recognizer.RecognizeTable(found_box);
1856 
1857  // Process a table. Good tables are inserted into the grid again later on
1858  // We can't change boxes in the grid while it is running a search.
1859  if (table_structure != NULL) {
1860  if (textord_show_tables) {
1861  table_structure->Display(table_win, ScrollView::LIME_GREEN);
1862  }
1863  found_table->set_bounding_box(table_structure->bounding_box());
1864  delete table_structure;
1865  good_it.add_after_then_move(found_table);
1866  } else {
1867  delete found_table;
1868  }
1869  }
1870  // TODO(nbeato): MERGE!! There is awesome info now available for merging.
1871 
1872  // At this point, the grid is empty. We can safely insert the good tables
1873  // back into grid.
1874  for (good_it.mark_cycle_pt(); !good_it.cycled_list(); good_it.forward())
1875  table_grid_.InsertBBox(true, true, good_it.extract());
1876 }
1877 
1878 // Displays the column segments in some window.
1880  ColSegment_LIST *segments,
1881  ScrollView::Color color) {
1882 #ifndef GRAPHICS_DISABLED
1883  win->Pen(color);
1884  win->Brush(ScrollView::NONE);
1885  ColSegment_IT it(segments);
1886  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1887  ColSegment* col = it.data();
1888  const TBOX& box = col->bounding_box();
1889  int left_x = box.left();
1890  int right_x = box.right();
1891  int top_y = box.top();
1892  int bottom_y = box.bottom();
1893  win->Rectangle(left_x, bottom_y, right_x, top_y);
1894  }
1895  win->UpdateWindow();
1896 #endif
1897 }
1898 
1900  ScrollView::Color color) {
1901 #ifndef GRAPHICS_DISABLED
1902  // Iterate the ColPartitions in the grid.
1904  gsearch(grid);
1905  gsearch.StartFullSearch();
1906  ColSegment* seg = NULL;
1907  while ((seg = gsearch.NextFullSearch()) != NULL) {
1908  const TBOX& box = seg->bounding_box();
1909  int left_x = box.left();
1910  int right_x = box.right();
1911  int top_y = box.top();
1912  int bottom_y = box.bottom();
1913  win->Brush(ScrollView::NONE);
1914  win->Pen(color);
1915  win->Rectangle(left_x, bottom_y, right_x, top_y);
1916  }
1917  win->UpdateWindow();
1918 #endif
1919 }
1920 
1921 // Displays the colpartitions using a new coloring on an existing window.
1922 // Note: This method is only for debug purpose during development and
1923 // would not be part of checked in code
1925  ColPartitionGrid* grid,
1926  ScrollView::Color default_color,
1927  ScrollView::Color table_color) {
1928 #ifndef GRAPHICS_DISABLED
1929  ScrollView::Color color = default_color;
1930  // Iterate the ColPartitions in the grid.
1932  gsearch(grid);
1933  gsearch.StartFullSearch();
1934  ColPartition* part = NULL;
1935  while ((part = gsearch.NextFullSearch()) != NULL) {
1936  color = default_color;
1937  if (part->type() == PT_TABLE)
1938  color = table_color;
1939 
1940  const TBOX& box = part->bounding_box();
1941  int left_x = box.left();
1942  int right_x = box.right();
1943  int top_y = box.top();
1944  int bottom_y = box.bottom();
1945  win->Brush(ScrollView::NONE);
1946  win->Pen(color);
1947  win->Rectangle(left_x, bottom_y, right_x, top_y);
1948  }
1949  win->UpdateWindow();
1950 #endif
1951 }
1953  ColPartitionGrid* grid,
1954  ScrollView::Color default_color) {
1955  DisplayColPartitions(win, grid, default_color, ScrollView::YELLOW);
1956 }
1957 
1959  ScrollView* win,
1960  ColPartitionGrid* grid,
1961  ScrollView::Color color) {
1962 #ifndef GRAPHICS_DISABLED
1963  // Iterate the ColPartitions in the grid.
1965  gsearch(grid);
1966  gsearch.StartFullSearch();
1967  ColPartition* part = NULL;
1968  while ((part = gsearch.NextFullSearch()) != NULL) {
1969  const TBOX& box = part->bounding_box();
1970  int left_x = box.left();
1971  int right_x = box.right();
1972  int top_y = box.top();
1973  int bottom_y = box.bottom();
1974 
1975  ColPartition* upper_part = part->nearest_neighbor_above();
1976  if (upper_part) {
1977  const TBOX& upper_box = upper_part->bounding_box();
1978  int mid_x = (left_x + right_x) / 2;
1979  int mid_y = (top_y + bottom_y) / 2;
1980  int other_x = (upper_box.left() + upper_box.right()) / 2;
1981  int other_y = (upper_box.top() + upper_box.bottom()) / 2;
1982  win->Brush(ScrollView::NONE);
1983  win->Pen(color);
1984  win->Line(mid_x, mid_y, other_x, other_y);
1985  }
1986  ColPartition* lower_part = part->nearest_neighbor_below();
1987  if (lower_part) {
1988  const TBOX& lower_box = lower_part->bounding_box();
1989  int mid_x = (left_x + right_x) / 2;
1990  int mid_y = (top_y + bottom_y) / 2;
1991  int other_x = (lower_box.left() + lower_box.right()) / 2;
1992  int other_y = (lower_box.top() + lower_box.bottom()) / 2;
1993  win->Brush(ScrollView::NONE);
1994  win->Pen(color);
1995  win->Line(mid_x, mid_y, other_x, other_y);
1996  }
1997  }
1998  win->UpdateWindow();
1999 #endif
2000 }
2001 
2002 
2003 // Write debug image and text file.
2004 // Note: This method is only for debug purpose during development and
2005 // would not be part of checked in code
2006 void TableFinder::WriteToPix(const FCOORD& reskew) {
2007  // Input file must be named test1.tif
2008  PIX* pix = pixRead("test1.tif");
2009  if (!pix) {
2010  tprintf("Input file test1.tif not found.\n");
2011  return;
2012  }
2013  int img_height = pixGetHeight(pix);
2014  int img_width = pixGetWidth(pix);
2015  // Maximum number of text or table partitions
2016  int num_boxes = 10;
2017  BOXA* text_box_array = boxaCreate(num_boxes);
2018  BOXA* table_box_array = boxaCreate(num_boxes);
2020  gsearch(&clean_part_grid_);
2021  gsearch.StartFullSearch();
2022  ColPartition* part;
2023  // load colpartitions into text_box_array and table_box_array
2024  while ((part = gsearch.NextFullSearch()) != NULL) {
2025  TBOX box = part->bounding_box();
2026  box.rotate_large(reskew);
2027  BOX* lept_box = boxCreate(box.left(), img_height - box.top(),
2028  box.right() - box.left(),
2029  box.top() - box.bottom());
2030  if (part->type() == PT_TABLE)
2031  boxaAddBox(table_box_array, lept_box, L_INSERT);
2032  else
2033  boxaAddBox(text_box_array, lept_box, L_INSERT);
2034  }
2035  // draw colpartitions on the output image
2036  PIX* out = pixDrawBoxa(pix, text_box_array, 3, 0xff000000);
2037  out = pixDrawBoxa(out, table_box_array, 3, 0x0000ff00);
2038 
2039  BOXA* table_array = boxaCreate(num_boxes);
2040  // text file containing detected table bounding boxes
2041  FILE* fptr = fopen("tess-table.txt", "wb");
2043  table_search(&table_grid_);
2044  table_search.StartFullSearch();
2045  ColSegment* table;
2046  // load table boxes to table_array and write them to text file as well
2047  while ((table = table_search.NextFullSearch()) != NULL) {
2048  TBOX box = table->bounding_box();
2049  box.rotate_large(reskew);
2050  // Since deskewing introduces negative coordinates, reskewing
2051  // might not completely recover from that since both steps enlarge
2052  // the actual box. Hence a box that undergoes deskewing/reskewing
2053  // may go out of image boundaries. Crop a table box if needed to
2054  // contain it inside the image dimensions.
2055  box = box.intersection(TBOX(0, 0, img_width - 1, img_height - 1));
2056  BOX* lept_box = boxCreate(box.left(), img_height - box.top(),
2057  box.right() - box.left(),
2058  box.top() - box.bottom());
2059  boxaAddBox(table_array, lept_box, L_INSERT);
2060  fprintf(fptr, "%d %d %d %d TABLE\n", box.left(),
2061  img_height - box.top(), box.right(), img_height - box.bottom());
2062  }
2063  fclose(fptr);
2064  // paint table boxes on the debug image
2065  out = pixDrawBoxa(out, table_array, 5, 0x7fff0000);
2066 
2067  pixWrite("out.png", out, IFF_PNG);
2068  // memory cleanup
2069  boxaDestroy(&text_box_array);
2070  boxaDestroy(&table_box_array);
2071  boxaDestroy(&table_array);
2072  pixDestroy(&pix);
2073  pixDestroy(&out);
2074 }
2075 
2076 // Merge all colpartitions in table regions to make them a single
2077 // colpartition and revert types of isolated table cells not
2078 // assigned to any table to their original types.
2080  ColPartitionSet** all_columns,
2081  WidthCallback* width_cb) {
2082  // Since we have table blocks already, remove table tags from all
2083  // colpartitions
2085  gsearch(grid);
2086  gsearch.StartFullSearch();
2087  ColPartition* part = NULL;
2088 
2089  while ((part = gsearch.NextFullSearch()) != NULL) {
2090  if (part->type() == PT_TABLE) {
2091  part->clear_table_type();
2092  }
2093  }
2094  // Now make a single colpartition out of each table block and remove
2095  // all colpartitions contained within a table
2097  table_search(&table_grid_);
2098  table_search.StartFullSearch();
2099  ColSegment* table;
2100  while ((table = table_search.NextFullSearch()) != NULL) {
2101  const TBOX& table_box = table->bounding_box();
2102  // Start a rect search on table_box
2104  rectsearch(grid);
2105  rectsearch.StartRectSearch(table_box);
2106  ColPartition* part;
2107  ColPartition* table_partition = NULL;
2108  while ((part = rectsearch.NextRectSearch()) != NULL) {
2109  // Do not consider image partitions
2110  if (!part->IsTextType())
2111  continue;
2112  TBOX part_box = part->bounding_box();
2113  // Include partition in the table if more than half of it
2114  // is covered by the table
2115  if (part_box.overlap_fraction(table_box) > kMinOverlapWithTable) {
2116  rectsearch.RemoveBBox();
2117  if (table_partition) {
2118  table_partition->Absorb(part, width_cb);
2119  } else {
2120  table_partition = part;
2121  }
2122  }
2123  }
2124  // Insert table colpartition back to part_grid_
2125  if (table_partition) {
2126  // To match the columns used when transforming to blocks, the new table
2127  // partition must have its first and last column set at the grid y that
2128  // corresponds to its bottom.
2129  const TBOX& table_box = table_partition->bounding_box();
2130  int grid_x, grid_y;
2131  grid->GridCoords(table_box.left(), table_box.bottom(), &grid_x, &grid_y);
2132  table_partition->SetPartitionType(resolution_, all_columns[grid_y]);
2133  table_partition->set_table_type();
2134  table_partition->set_blob_type(BRT_TEXT);
2135  table_partition->set_flow(BTFT_CHAIN);
2136  table_partition->SetBlobTypes();
2137  grid->InsertBBox(true, true, table_partition);
2138  }
2139  }
2140 }
2141 
2145  : ELIST_LINK(),
2146  num_table_cells_(0),
2147  num_text_cells_(0),
2148  type_(COL_UNKNOWN) {
2149 }
2151 }
2152 
2153 // Provides a color for BBGrid to draw the rectangle.
2155  const ScrollView::Color kBoxColors[PT_COUNT] = {
2160  };
2161  return kBoxColors[type_];
2162 }
2163 
2164 // Insert a box into this column segment
2165 void ColSegment::InsertBox(const TBOX& other) {
2166  bounding_box_ = bounding_box_.bounding_union(other);
2167 }
2168 
2169 // Set column segment type based on the ratio of text and table partitions
2170 // in it.
2172  if (num_table_cells_ > kTableColumnThreshold * num_text_cells_)
2173  type_ = COL_TABLE;
2174  else if (num_text_cells_ > num_table_cells_)
2175  type_ = COL_TEXT;
2176  else
2177  type_ = COL_MIXED;
2178 }
2179 
2180 } // namespace tesseract.
void set_line_grid(ColPartitionGrid *lines)
Definition: tablerecog.cpp:723
void set_bottom(int y)
Definition: rect.h:64
const double kAllowTextWidth
Definition: tablefind.cpp:53
const TBOX & bounding_box() const
Definition: tablerecog.cpp:110
void GrowTableBox(const TBOX &table_box, TBOX *result_box)
Definition: tablefind.cpp:1528
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:129
const double kAllowTextArea
Definition: tablefind.cpp:54
const double kAllowBlobHeight
Definition: tablefind.cpp:59
ColSegmentGrid col_seg_grid_
Definition: tablefind.h:426
const double kSplitPartitionSize
Definition: tablefind.cpp:47
const TBOX & bounding_box() const
Definition: blobbox.h:215
void set_flow(BlobTextFlowType f)
Definition: colpartition.h:157
int LeftAtY(int y) const
Definition: colpartition.h:340
bool AllowBlob(const BLOBNBOX &blob) const
Definition: tablefind.cpp:510
void InsertLeaderPartition(ColPartition *part)
Definition: tablefind.cpp:418
bool textord_show_tables
Definition: tablefind.cpp:147
void set_global_median_ledding(int ledding)
Definition: tablefind.cpp:770
int space_to_right() const
Definition: colpartition.h:279
const double kSmallTableProjectionThreshold
Definition: tablefind.cpp:109
const int kMaxBlobWidth
Definition: tablefind.cpp:43
void MoveColSegmentsToGrid(ColSegment_LIST *segments, ColSegmentGrid *col_seg_grid)
Definition: tablefind.cpp:1184
void InsertBox(const TBOX &other)
Definition: tablefind.cpp:2165
bool overlap(const TBOX &box) const
Definition: rect.h:345
void MarkPartitionsUsingLocalInformation()
Definition: tablefind.cpp:835
void AddBox(BLOBNBOX *box)
const ICOORD & tright() const
Definition: bbgrid.h:75
void InsertRulingPartition(ColPartition *part)
Definition: tablefind.cpp:426
void Display(ScrollView *window, ScrollView::Color color)
Definition: tablerecog.cpp:290
void GetColumnBlocks(ColPartitionSet **columns, ColSegment_LIST *col_segments)
Definition: tablefind.cpp:531
integer coordinate
Definition: points.h:30
ScrollView::Color BoxColor() const
Definition: tablefind.cpp:2154
const TBOX & bounding_box() const
Definition: tablefind.h:52
int GridY() const
Definition: bbgrid.h:246
BBC * NextRectSearch()
Definition: bbgrid.h:845
PolyBlockType type() const
Definition: colpartition.h:181
ColPartitionGrid clean_part_grid_
Definition: tablefind.h:418
bool textord_tablefind_show_stats
Definition: tablefind.cpp:151
const double kAllowBlobArea
Definition: tablefind.cpp:61
const double kMaxGapInTextPartition
Definition: tablefind.cpp:72
BBC * NextVerticalSearch(bool top_to_bottom)
Definition: bbgrid.h:805
void set_min_height(int height)
Definition: tablerecog.cpp:726
const double kMinOverlapWithTable
Definition: tablefind.cpp:100
void GetTableRegions(ColSegment_LIST *table_columns, ColSegment_LIST *table_regions)
Definition: tablefind.cpp:1331
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:538
void add(inT32 value, inT32 count)
Definition: statistc.cpp:101
inT16 width() const
Definition: rect.h:111
void set_num_table_cells(int n)
Definition: tablefind.h:81
void MakeTableBlocks(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback *width_cb)
Definition: tablefind.cpp:2079
ELISTIZE(AmbigSpec)
#define MIN(x, y)
Definition: ndminx.h:28
const int kMaxBoxesInDataPartition
Definition: tablefind.cpp:69
void SetUniqueMode(bool mode)
Definition: bbgrid.h:254
void set_inside_table_column(bool val)
Definition: colpartition.h:246
void StartVerticalSearch(int xmin, int xmax, int y)
Definition: bbgrid.h:791
bool AllowTextPartition(const ColPartition &part) const
Definition: tablefind.cpp:497
void ClearGridData(void(*free_method)(BBC *))
Definition: bbgrid.h:467
void LocateTables(ColPartitionGrid *grid, ColPartitionSet **columns, WidthCallback *width_cb, const FCOORD &reskew)
Definition: tablefind.cpp:264
int gridwidth() const
Definition: bbgrid.h:66
BlobTextFlowType flow() const
Definition: blobbox.h:280
void DeleteObject(T *object)
Definition: tablefind.cpp:160
void GroupColumnBlocks(ColSegment_LIST *current_segments, ColSegment_LIST *col_segments)
Definition: tablefind.cpp:546
bool null_box() const
Definition: rect.h:46
ColSegmentGrid table_grid_
Definition: tablefind.h:428
Definition: capi.h:97
const int kMaxVerticalSpacing
Definition: tablefind.cpp:41
bool textord_tablefind_show_mark
Definition: tablefind.cpp:149
const double kParagraphEndingPreviousLineRatio
Definition: tablefind.cpp:125
ColSegType type() const
Definition: tablefind.h:94
void Brush(Color color)
Definition: scrollview.cpp:732
void DisplayBoxes(ScrollView *window)
Definition: bbgrid.h:616
void Init(int grid_size, const ICOORD &bottom_left, const ICOORD &top_right)
Definition: tablefind.cpp:186
ColPartition * nearest_neighbor_above() const
Definition: colpartition.h:249
void GetTableColumns(ColSegment_LIST *table_columns)
Definition: tablefind.cpp:1281
const int kSideSpaceMargin
Definition: tablefind.cpp:105
void Absorb(ColPartition *other, WidthCallback *cb)
void set_text_grid(ColPartitionGrid *text)
Definition: tablerecog.cpp:720
void set_left(int x)
Definition: rect.h:71
void set_global_median_blob_width(int width)
Definition: tablefind.cpp:767
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:402
void set_space_above(int space)
Definition: colpartition.h:264
bool BelongToOneTable(const TBOX &box1, const TBOX &box2)
Definition: tablefind.cpp:1452
const double kMaxTableCellXheight
Definition: tablefind.cpp:84
inT16 bottom() const
Definition: rect.h:61
void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments)
void set_space_to_left(int space)
Definition: colpartition.h:276
void set_space_to_right(int space)
Definition: colpartition.h:282
ColPartition * nearest_neighbor_below() const
Definition: colpartition.h:255
bool textord_tablefind_recognize_tables
Definition: tablefind.cpp:153
void InsertImagePartition(ColPartition *part)
Definition: tablefind.cpp:429
void DisplayColPartitions(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color text_color, ScrollView::Color table_color)
Definition: tablefind.cpp:1924
void set_left_to_right_language(bool order)
Definition: tablefind.cpp:182
Definition: capi.h:98
bool HasLeaderAdjacent(const ColPartition &part)
Definition: tablefind.cpp:954
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:592
void StartSideSearch(int x, int ymin, int ymax)
Definition: bbgrid.h:749
const double kAllowTextHeight
Definition: tablefind.cpp:52
const int kLargeTableRowCount
Definition: tablefind.cpp:112
bool contains(const FCOORD pt) const
Definition: rect.h:323
void set_nearest_neighbor_below(ColPartition *part)
Definition: colpartition.h:258
bool MatchingStrokeWidth(const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
const double kMaxParagraphEndingLeftSpaceMultiple
Definition: tablefind.cpp:129
void DisplayColPartitionConnections(ScrollView *win, ColPartitionGrid *grid, ScrollView::Color default_color)
Definition: tablefind.cpp:1958
bool HasWideOrNoInterWordGap(ColPartition *part) const
Definition: tablefind.cpp:865
void set_num_text_cells(int n)
Definition: tablefind.h:90
const double kMaxBlobOverlapFactor
Definition: tablefind.cpp:80
bool HLineBelongsToTable(const ColPartition &part, const TBOX &table_box)
Definition: tablefind.cpp:1608
inT16 x() const
access function
Definition: points.h:52
StructuredTable * RecognizeTable(const TBOX &guess_box)
Definition: tablerecog.cpp:736
double overlap_fraction(const TBOX &box) const
Definition: rect.h:378
bool IsInSameColumnAs(const ColPartition &part) const
BBC * NextSideSearch(bool right_to_left)
Definition: bbgrid.h:764
ColPartitionGrid fragmented_text_grid_
Definition: tablefind.h:424
ColPartition * SingletonPartner(bool upper)
int RightAtY(int y) const
Definition: colpartition.h:344
inT16 left() const
Definition: rect.h:68
void Pen(Color color)
Definition: scrollview.cpp:726
bool textord_dump_table_images
Definition: tablefind.cpp:146
int gridheight() const
Definition: tablefind.cpp:392
void SetPartitionType(int resolution, ColPartitionSet *columns)
ColPartitionGrid leader_and_ruling_grid_
Definition: tablefind.h:420
bool VSignificantCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:387
const int kRulingVerticalMargin
Definition: tablefind.cpp:96
void Clear()
Definition: bbgrid.h:458
void InsertFragmentedTextPartition(ColPartition *part)
Definition: tablefind.cpp:410
void set_nearest_neighbor_above(ColPartition *part)
Definition: colpartition.h:252
inT32 area() const
Definition: rect.h:118
const ICOORD & bleft() const
Definition: tablefind.cpp:395
bool IsHorizontalLine() const
Definition: colpartition.h:453
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
BBC * NextFullSearch()
Definition: bbgrid.h:678
inT16 height() const
Definition: rect.h:104
void InitializePartitions(ColPartitionSet **all_columns)
Definition: tablefind.cpp:587
const int kMinRowsInTable
Definition: tablefind.cpp:115
#define MAX(x, y)
Definition: ndminx.h:24
#define MIN_INT32
Definition: host.h:61
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:606
ColPartition * ShallowCopy() const
void set_bounding_box(const TBOX &other)
Definition: tablefind.h:72
int gridsize() const
Definition: bbgrid.h:63
void set_right(int x)
Definition: rect.h:78
#define tprintf(...)
Definition: tprintf.h:31
void DisplayColSegmentGrid(ScrollView *win, ColSegmentGrid *grid, ScrollView::Color color)
Definition: tablefind.cpp:1899
#define MAX_INT32
Definition: host.h:53
const double kMinParagraphEndingTextToWhitespaceRatio
Definition: tablefind.cpp:135
const double kLargeTableProjectionThreshold
Definition: tablefind.cpp:110
Definition: points.h:189
void set_global_median_xheight(int xheight)
Definition: tablefind.cpp:764
void SetGlobalSpacings(ColPartitionGrid *grid)
Definition: tablefind.cpp:717
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
Definition: bbgrid.cpp:54
int gridheight() const
Definition: bbgrid.h:69
ColPartition * CopyButDontOwnBlobs()
inT16 top() const
Definition: rect.h:54
const double kAllowBlobWidth
Definition: tablefind.cpp:60
const double kMaxXProjectionGapFactor
Definition: tablefind.cpp:139
void GrowTableToIncludePartials(const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
Definition: tablefind.cpp:1550
void GrowTableToIncludeLines(const TBOX &table_box, const TBOX &search_range, TBOX *result_box)
Definition: tablefind.cpp:1578
void rotate_large(const FCOORD &vec)
Definition: rect.cpp:72
const int kMaxColumnHeaderDistance
Definition: tablefind.cpp:88
void set_max_text_height(int height)
Definition: tablerecog.cpp:732
bool IsImageType() const
Definition: colpartition.h:423
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:489
void set_top(int y)
Definition: rect.h:57
bool ConsecutiveBoxes(const TBOX &b1, const TBOX &b2)
Definition: tablefind.cpp:576
const ICOORD & bleft() const
Definition: bbgrid.h:72
void set_space_below(int space)
Definition: colpartition.h:270
void SetVerticalSpacing(ColPartition *part)
Definition: tablefind.cpp:674
void InsertCleanPartitions(ColPartitionGrid *grid, TO_BLOCK *block)
Definition: tablefind.cpp:198
void InsertTextPartition(ColPartition *part)
Definition: tablefind.cpp:402
const ICOORD & tright() const
Definition: tablefind.cpp:398
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: tablefind.cpp:526
double median() const
Definition: statistc.cpp:239
ColPartition * ColumnContaining(int x, int y)
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
BlobTextFlowType flow() const
Definition: colpartition.h:154
void SplitAndInsertFragmentedTextPartition(ColPartition *part)
Definition: tablefind.cpp:444
TBOX intersection(const TBOX &box) const
Definition: rect.cpp:87
const double kMinMaxGapInTextPartition
Definition: tablefind.cpp:76
bool GapInXProjection(int *xprojection, int length)
Definition: tablefind.cpp:1776
void RefinePartitionPartners(bool get_desperate)
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:187
void set_blob_type(BlobRegionType t)
Definition: colpartition.h:151
const TBOX & bounding_box() const
Definition: colpartition.h:109
void StartRectSearch(const TBOX &rect)
Definition: bbgrid.h:833
void DisplayColSegments(ScrollView *win, ColSegment_LIST *cols, ScrollView::Color color)
Definition: tablefind.cpp:1879
BlobRegionType region_type() const
Definition: blobbox.h:268
bool MatchingSizes(const ColPartition &other) const
ColPartition * SplitAt(int split_x)
#define BOOL_VAR(name, val, comment)
Definition: params.h:280
BlobRegionType blob_type() const
Definition: colpartition.h:148
#define CLISTIZE(CLASSNAME)
Definition: clst.h:913
void SetColumnsType(ColSegment_LIST *col_segments)
Definition: tablefind.cpp:1151
bool major_y_overlap(const TBOX &box) const
Definition: rect.h:429
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:447
const double kTableColumnThreshold
Definition: tablefind.cpp:92
const double kStrokeWidthFractionalTolerance
Definition: tablefind.cpp:143
Definition: statistc.h:33
#define ASSERT_HOST(x)
Definition: errcode.h:84
void RepositionIterator()
Definition: bbgrid.h:895
void IncludeLeftOutColumnHeaders(TBOX *table_box)
Definition: tablefind.cpp:1672
void WriteToPix(const FCOORD &reskew)
Definition: tablefind.cpp:2006
void StartFullSearch()
Definition: bbgrid.h:668
void plot(ScrollView *window, float xorigin, float yorigin, float xscale, float yscale, ScrollView::Color colour) const
Definition: statistc.cpp:585
const int kAdjacentLeaderSearchPadding
Definition: tablefind.cpp:120
static void SetPartitionSpacings(ColPartitionGrid *grid, ColPartitionSet **all_columns)
Definition: tablefind.cpp:594
void UpdateWindow()
Definition: scrollview.cpp:710
const double kStrokeWidthConstantTolerance
Definition: tablefind.cpp:144
inT16 y() const
access_function
Definition: points.h:56
const int kMinBoxesInTextPartition
Definition: tablefind.cpp:66