tesseract  3.05.02
colpartitionset.cpp
Go to the documentation of this file.
1 // File: colpartitionset.cpp
3 // Description: Class to hold a list of ColPartitions of the page that
4 // correspond roughly to columns.
5 // Author: Ray Smith
6 // Created: Thu Aug 14 10:54:01 PDT 2008
7 //
8 // (C) Copyright 2008, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "colpartitionset.h"
26 #include "ndminx.h"
27 #include "workingpartset.h"
28 #include "tablefind.h"
29 
30 namespace tesseract {
31 
32 // Minimum width of a column to be interesting as a multiple of resolution.
33 const double kMinColumnWidth = 2.0 / 3;
34 
36 
37 ColPartitionSet::ColPartitionSet(ColPartition_LIST* partitions) {
38  ColPartition_IT it(&parts_);
39  it.add_list_after(partitions);
40  ComputeCoverage();
41 }
42 
44  ColPartition_IT it(&parts_);
45  it.add_after_then_move(part);
46  ComputeCoverage();
47 }
48 
50 }
51 
52 // Returns the number of columns of good width.
54  int num_good_cols = 0;
55  // This is a read-only iteration of the list.
56  ColPartition_IT it(const_cast<ColPartition_LIST*>(&parts_));
57  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
58  if (it.data()->good_width()) ++num_good_cols;
59  }
60  return num_good_cols;
61 }
62 
63 // Return an element of the parts_ list from its index.
65  ColPartition_IT it(&parts_);
66  it.mark_cycle_pt();
67  for (int i = 0; i < index && !it.cycled_list(); ++i, it.forward());
68  if (it.cycled_list())
69  return NULL;
70  return it.data();
71 }
72 
73 // Return the ColPartition that contains the given coords, if any, else NULL.
75  ColPartition_IT it(&parts_);
76  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
77  ColPartition* part = it.data();
78  if (part->ColumnContains(x, y))
79  return part;
80  }
81  return NULL;
82 }
83 
84 // Extract all the parts from the list, relinquishing ownership.
86  ColPartition_IT it(&parts_);
87  while (!it.empty()) {
88  it.extract();
89  it.forward();
90  }
91 }
92 
93 // Attempt to improve this by adding partitions or expanding partitions.
95  PartSetVector* src_sets) {
96  int set_size = src_sets->size();
97  // Iterate over the provided column sets, as each one may have something
98  // to improve this.
99  for (int i = 0; i < set_size; ++i) {
100  ColPartitionSet* column_set = src_sets->get(i);
101  if (column_set == NULL)
102  continue;
103  // Iterate over the parts in this and column_set, adding bigger or
104  // new parts in column_set to this.
105  ColPartition_IT part_it(&parts_);
106  ASSERT_HOST(!part_it.empty());
107  int prev_right = MIN_INT32;
108  part_it.mark_cycle_pt();
109  ColPartition_IT col_it(&column_set->parts_);
110  for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
111  ColPartition* col_part = col_it.data();
112  if (col_part->blob_type() < BRT_UNKNOWN)
113  continue; // Ignore image partitions.
114  int col_left = col_part->left_key();
115  int col_right = col_part->right_key();
116  // Sync-up part_it (in this) so it matches the col_part in column_set.
117  ColPartition* part = part_it.data();
118  while (!part_it.at_last() && part->right_key() < col_left) {
119  prev_right = part->right_key();
120  part_it.forward();
121  part = part_it.data();
122  }
123  int part_left = part->left_key();
124  int part_right = part->right_key();
125  if (part_right < col_left || col_right < part_left) {
126  // There is no overlap so this is a new partition.
127  AddPartition(col_part->ShallowCopy(), &part_it);
128  continue;
129  }
130  // Check the edges of col_part to see if they can improve part.
131  bool part_width_ok = cb->Run(part->KeyWidth(part_left, part_right));
132  if (col_left < part_left && col_left > prev_right) {
133  // The left edge of the column is better and it doesn't overlap,
134  // so we can potentially expand it.
135  int col_box_left = col_part->BoxLeftKey();
136  bool tab_width_ok = cb->Run(part->KeyWidth(col_left, part_right));
137  bool box_width_ok = cb->Run(part->KeyWidth(col_box_left, part_right));
138  if (tab_width_ok || (!part_width_ok )) {
139  // The tab is leaving the good column metric at least as good as
140  // it was before, so use the tab.
141  part->CopyLeftTab(*col_part, false);
142  part->SetColumnGoodness(cb);
143  } else if (col_box_left < part_left &&
144  (box_width_ok || !part_width_ok)) {
145  // The box is leaving the good column metric at least as good as
146  // it was before, so use the box.
147  part->CopyLeftTab(*col_part, true);
148  part->SetColumnGoodness(cb);
149  }
150  part_left = part->left_key();
151  }
152  if (col_right > part_right &&
153  (part_it.at_last() ||
154  part_it.data_relative(1)->left_key() > col_right)) {
155  // The right edge is better, so we can possibly expand it.
156  int col_box_right = col_part->BoxRightKey();
157  bool tab_width_ok = cb->Run(part->KeyWidth(part_left, col_right));
158  bool box_width_ok = cb->Run(part->KeyWidth(part_left, col_box_right));
159  if (tab_width_ok || (!part_width_ok )) {
160  // The tab is leaving the good column metric at least as good as
161  // it was before, so use the tab.
162  part->CopyRightTab(*col_part, false);
163  part->SetColumnGoodness(cb);
164  } else if (col_box_right > part_right &&
165  (box_width_ok || !part_width_ok)) {
166  // The box is leaving the good column metric at least as good as
167  // it was before, so use the box.
168  part->CopyRightTab(*col_part, true);
169  part->SetColumnGoodness(cb);
170  }
171  }
172  }
173  }
174  ComputeCoverage();
175 }
176 
177 // If this set is good enough to represent a new partitioning into columns,
178 // add it to the vector of sets, otherwise delete it.
180  WidthCallback* cb) {
181  bool debug = TabFind::WithinTestRegion(2, bounding_box_.left(),
182  bounding_box_.bottom());
183  if (debug) {
184  tprintf("Considering new column candidate:\n");
185  Print();
186  }
187  if (!LegalColumnCandidate()) {
188  if (debug) {
189  tprintf("Not a legal column candidate:\n");
190  Print();
191  }
192  delete this;
193  return;
194  }
195  for (int i = 0; i < column_sets->size(); ++i) {
196  ColPartitionSet* columns = column_sets->get(i);
197  // In ordering the column set candidates, good_coverage_ is king,
198  // followed by good_column_count_ and then bad_coverage_.
199  bool better = good_coverage_ > columns->good_coverage_;
200  if (good_coverage_ == columns->good_coverage_) {
201  better = good_column_count_ > columns->good_column_count_;
202  if (good_column_count_ == columns->good_column_count_) {
203  better = bad_coverage_ > columns->bad_coverage_;
204  }
205  }
206  if (better) {
207  // The new one is better so add it.
208  if (debug)
209  tprintf("Good one\n");
210  column_sets->insert(this, i);
211  return;
212  }
213  if (columns->CompatibleColumns(false, this, cb)) {
214  if (debug)
215  tprintf("Duplicate\n");
216  delete this;
217  return; // It is not unique.
218  }
219  }
220  if (debug)
221  tprintf("Added to end\n");
222  column_sets->push_back(this);
223 }
224 
225 // Return true if the partitions in other are all compatible with the columns
226 // in this.
228  WidthCallback* cb) {
229  if (debug) {
230  tprintf("CompatibleColumns testing compatibility\n");
231  Print();
232  other->Print();
233  }
234  if (other->parts_.empty()) {
235  if (debug)
236  tprintf("CompatibleColumns true due to empty other\n");
237  return true;
238  }
239  ColPartition_IT it(&other->parts_);
240  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
241  ColPartition* part = it.data();
242  if (part->blob_type() < BRT_UNKNOWN) {
243  if (debug) {
244  tprintf("CompatibleColumns ignoring image partition\n");
245  part->Print();
246  }
247  continue; // Image partitions are irrelevant to column compatibility.
248  }
249  int y = part->MidY();
250  int left = part->bounding_box().left();
251  int right = part->bounding_box().right();
252  ColPartition* left_col = ColumnContaining(left, y);
253  ColPartition* right_col = ColumnContaining(right, y);
254  if (right_col == NULL || left_col == NULL) {
255  if (debug) {
256  tprintf("CompatibleColumns false due to partition edge outside\n");
257  part->Print();
258  }
259  return false; // A partition edge lies outside of all columns
260  }
261  if (right_col != left_col && cb->Run(right - left)) {
262  if (debug) {
263  tprintf("CompatibleColumns false due to good width in multiple cols\n");
264  part->Print();
265  }
266  return false; // Partition with a good width must be in a single column.
267  }
268 
269  ColPartition_IT it2= it;
270  while (!it2.at_last()) {
271  it2.forward();
272  ColPartition* next_part = it2.data();
273  if (!BLOBNBOX::IsTextType(next_part->blob_type()))
274  continue; // Non-text partitions are irrelevant.
275  int next_left = next_part->bounding_box().left();
276  if (next_left == right) {
277  break; // They share the same edge, so one must be a pull-out.
278  }
279  // Search to see if right and next_left fall within a single column.
280  ColPartition* next_left_col = ColumnContaining(next_left, y);
281  if (right_col == next_left_col) {
282  // There is a column break in this column.
283  // This can be due to a figure caption within a column, a pull-out
284  // block, or a simple broken textline that remains to be merged:
285  // all allowed, or a change in column layout: not allowed.
286  // If both partitions are of good width, then it is likely
287  // a change in column layout, otherwise probably an allowed situation.
288  if (part->good_width() && next_part->good_width()) {
289  if (debug) {
290  int next_right = next_part->bounding_box().right();
291  tprintf("CompatibleColumns false due to 2 parts of good width\n");
292  tprintf("part1 %d-%d, part2 %d-%d\n",
293  left, right, next_left, next_right);
294  right_col->Print();
295  }
296  return false;
297  }
298  }
299  break;
300  }
301  }
302  if (debug)
303  tprintf("CompatibleColumns true!\n");
304  return true;
305 }
306 
307 // Returns the total width of all blobs in the part_set that do not lie
308 // within an approved column. Used as a cost measure for using this
309 // column set over another that might be compatible.
311  int total_width = 0;
312  ColPartition_IT it(&part_set->parts_);
313  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
314  ColPartition* part = it.data();
315  if (!BLOBNBOX::IsTextType(part->blob_type())) {
316  continue; // Non-text partitions are irrelevant to column compatibility.
317  }
318  int y = part->MidY();
319  BLOBNBOX_C_IT box_it(part->boxes());
320  for (box_it.mark_cycle_pt(); !box_it.cycled_list(); box_it.forward()) {
321  const TBOX& box = it.data()->bounding_box();
322  // Assume that the whole blob is outside any column iff its x-middle
323  // is outside.
324  int x = (box.left() + box.right()) / 2;
325  ColPartition* col = ColumnContaining(x, y);
326  if (col == NULL)
327  total_width += box.width();
328  }
329  }
330  return total_width;
331 }
332 
333 // Return true if this ColPartitionSet makes a legal column candidate by
334 // having legal individual partitions and non-overlapping adjacent pairs.
336  ColPartition_IT it(&parts_);
337  if (it.empty())
338  return false;
339  bool any_text_parts = false;
340  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
341  ColPartition* part = it.data();
342  if (BLOBNBOX::IsTextType(part->blob_type())) {
343  if (!part->IsLegal())
344  return false; // Individual partition is illegal.
345  any_text_parts = true;
346  }
347  if (!it.at_last()) {
348  ColPartition* next_part = it.data_relative(1);
349  if (next_part->left_key() < part->right_key()) {
350  return false;
351  }
352  }
353  }
354  return any_text_parts;
355 }
356 
357 // Return a copy of this. If good_only will only copy the Good ColPartitions.
359  ColPartition_LIST copy_parts;
360  ColPartition_IT src_it(&parts_);
361  ColPartition_IT dest_it(&copy_parts);
362  for (src_it.mark_cycle_pt(); !src_it.cycled_list(); src_it.forward()) {
363  ColPartition* part = src_it.data();
364  if (BLOBNBOX::IsTextType(part->blob_type()) &&
365  (!good_only || part->good_width() || part->good_column()))
366  dest_it.add_after_then_move(part->ShallowCopy());
367  }
368  if (dest_it.empty())
369  return NULL;
370  return new ColPartitionSet(&copy_parts);
371 }
372 
373 // Return the bounding boxes of columns at the given y-range
374 void ColPartitionSet::GetColumnBoxes(int y_bottom, int y_top,
375  ColSegment_LIST *segments) {
376  ColPartition_IT it(&parts_);
377  ColSegment_IT col_it(segments);
378  col_it.move_to_last();
379  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
380  ColPartition* part = it.data();
381  ICOORD bot_left(part->LeftAtY(y_top), y_bottom);
382  ICOORD top_right(part->RightAtY(y_bottom), y_top);
383  ColSegment *col_seg = new ColSegment();
384  col_seg->InsertBox(TBOX(bot_left, top_right));
385  col_it.add_after_then_move(col_seg);
386  }
387 }
388 
389 // Display the edges of the columns at the given y coords.
390 void ColPartitionSet::DisplayColumnEdges(int y_bottom, int y_top,
391  ScrollView* win) {
392 #ifndef GRAPHICS_DISABLED
393  ColPartition_IT it(&parts_);
394  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
395  ColPartition* part = it.data();
396  win->Line(part->LeftAtY(y_top), y_top, part->LeftAtY(y_bottom), y_bottom);
397  win->Line(part->RightAtY(y_top), y_top, part->RightAtY(y_bottom), y_bottom);
398  }
399 #endif // GRAPHICS_DISABLED
400 }
401 
402 // Return the ColumnSpanningType that best explains the columns overlapped
403 // by the given coords(left,right,y), with the given margins.
404 // Also return the first and last column index touched by the coords and
405 // the leftmost spanned column.
406 // Column indices are 2n + 1 for real columns (0 based) and even values
407 // represent the gaps in between columns, with 0 being left of the leftmost.
408 // resolution refers to the ppi resolution of the image.
410  int left, int right,
411  int height, int y,
412  int left_margin,
413  int right_margin,
414  int* first_col,
415  int* last_col,
416  int* first_spanned_col) {
417  *first_col = -1;
418  *last_col = -1;
419  *first_spanned_col = -1;
420  int margin_columns = 0;
421  ColPartition_IT it(&parts_);
422  int col_index = 1;
423  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward(), col_index += 2) {
424  ColPartition* part = it.data();
425  if (part->ColumnContains(left, y) ||
426  (it.at_first() && part->ColumnContains(left + height, y))) {
427  // In the default case, first_col is set, but columns_spanned remains
428  // zero, so first_col will get reset in the first column genuinely
429  // spanned, but we can tell the difference from a noise partition
430  // that touches no column.
431  *first_col = col_index;
432  if (part->ColumnContains(right, y) ||
433  (it.at_last() && part->ColumnContains(right - height, y))) {
434  // Both within a single column.
435  *last_col = col_index;
436  return CST_FLOWING;
437  }
438  if (left_margin <= part->LeftAtY(y)) {
439  // It completely spans this column.
440  *first_spanned_col = col_index;
441  margin_columns = 1;
442  }
443  } else if (part->ColumnContains(right, y) ||
444  (it.at_last() && part->ColumnContains(right - height, y))) {
445  if (*first_col < 0) {
446  // It started in-between.
447  *first_col = col_index - 1;
448  }
449  if (right_margin >= part->RightAtY(y)) {
450  // It completely spans this column.
451  if (margin_columns == 0)
452  *first_spanned_col = col_index;
453  ++margin_columns;
454  }
455  *last_col = col_index;
456  break;
457  } else if (left < part->LeftAtY(y) && right > part->RightAtY(y)) {
458  // Neither left nor right are contained within, so it spans this
459  // column.
460  if (*first_col < 0) {
461  // It started in between the previous column and the current column.
462  *first_col = col_index - 1;
463  }
464  if (margin_columns == 0)
465  *first_spanned_col = col_index;
466  *last_col = col_index;
467  } else if (right < part->LeftAtY(y)) {
468  // We have gone past the end.
469  *last_col = col_index - 1;
470  if (*first_col < 0) {
471  // It must lie completely between columns =>noise.
472  *first_col = col_index - 1;
473  }
474  break;
475  }
476  }
477  if (*first_col < 0)
478  *first_col = col_index - 1; // The last in-between.
479  if (*last_col < 0)
480  *last_col = col_index - 1; // The last in-between.
481  ASSERT_HOST(*first_col >= 0 && *last_col >= 0);
482  ASSERT_HOST(*first_col <= *last_col);
483  if (*first_col == *last_col && right - left < kMinColumnWidth * resolution) {
484  // Neither end was in a column, and it didn't span any, so it lies
485  // entirely between columns, therefore noise.
486  return CST_NOISE;
487  } else if (margin_columns <= 1) {
488  // An exception for headings that stick outside of single-column text.
489  if (margin_columns == 1 && parts_.singleton()) {
490  return CST_HEADING;
491  }
492  // It is a pullout, as left and right were not in the same column, but
493  // it doesn't go to the edge of its start and end.
494  return CST_PULLOUT;
495  }
496  // Its margins went to the edges of first and last columns => heading.
497  return CST_HEADING;
498 }
499 
500 // The column_set has changed. Close down all in-progress WorkingPartSets in
501 // columns that do not match and start new ones for the new columns in this.
502 // As ColPartitions are turned into BLOCKs, the used ones are put in
503 // used_parts, as they still need to be referenced in the grid.
505  const ICOORD& tright,
506  int resolution,
507  ColPartition_LIST* used_parts,
508  WorkingPartSet_LIST* working_set_list) {
509  // Move the input list to a temporary location so we can delete its elements
510  // as we add them to the output working_set.
511  WorkingPartSet_LIST work_src;
512  WorkingPartSet_IT src_it(&work_src);
513  src_it.add_list_after(working_set_list);
514  src_it.move_to_first();
515  WorkingPartSet_IT dest_it(working_set_list);
516  // Completed blocks and to_blocks are accumulated and given to the first new
517  // one whenever we keep a column, or at the end.
518  BLOCK_LIST completed_blocks;
519  TO_BLOCK_LIST to_blocks;
520  WorkingPartSet* first_new_set = NULL;
521  WorkingPartSet* working_set = NULL;
522  ColPartition_IT col_it(&parts_);
523  for (col_it.mark_cycle_pt(); !col_it.cycled_list(); col_it.forward()) {
524  ColPartition* column = col_it.data();
525  // Any existing column to the left of column is completed.
526  while (!src_it.empty() &&
527  ((working_set = src_it.data())->column() == NULL ||
528  working_set->column()->right_key() <= column->left_key())) {
529  src_it.extract();
530  working_set->ExtractCompletedBlocks(bleft, tright, resolution,
531  used_parts, &completed_blocks,
532  &to_blocks);
533  delete working_set;
534  src_it.forward();
535  }
536  // Make a new between-column WorkingSet for before the current column.
537  working_set = new WorkingPartSet(NULL);
538  dest_it.add_after_then_move(working_set);
539  if (first_new_set == NULL)
540  first_new_set = working_set;
541  // A matching column gets to stay, and first_new_set gets all the
542  // completed_sets.
543  working_set = src_it.empty() ? NULL : src_it.data();
544  if (working_set != NULL &&
545  working_set->column()->MatchingColumns(*column)) {
546  working_set->set_column(column);
547  dest_it.add_after_then_move(src_it.extract());
548  src_it.forward();
549  first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
550  first_new_set = NULL;
551  } else {
552  // Just make a new working set for the current column.
553  working_set = new WorkingPartSet(column);
554  dest_it.add_after_then_move(working_set);
555  }
556  }
557  // Complete any remaining src working sets.
558  while (!src_it.empty()) {
559  working_set = src_it.extract();
560  working_set->ExtractCompletedBlocks(bleft, tright, resolution,
561  used_parts, &completed_blocks,
562  &to_blocks);
563  delete working_set;
564  src_it.forward();
565  }
566  // Make a new between-column WorkingSet for after the last column.
567  working_set = new WorkingPartSet(NULL);
568  dest_it.add_after_then_move(working_set);
569  if (first_new_set == NULL)
570  first_new_set = working_set;
571  // The first_new_set now gets any accumulated completed_parts/blocks.
572  first_new_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
573 }
574 
575 // Accumulate the widths and gaps into the given variables.
577  int* width_samples,
578  int* total_gap,
579  int* gap_samples) {
580  ColPartition_IT it(&parts_);
581  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
582  ColPartition* part = it.data();
583  *total_width += part->ColumnWidth();
584  ++*width_samples;
585  if (!it.at_last()) {
586  ColPartition* next_part = it.data_relative(1);
587  int gap = part->KeyWidth(part->right_key(), next_part->left_key());
588  *total_gap += gap;
589  ++*gap_samples;
590  }
591  }
592 }
593 
594 // Provide debug output for this ColPartitionSet and all the ColPartitions.
596  ColPartition_IT it(&parts_);
597  tprintf("Partition set of %d parts, %d good, coverage=%d+%d"
598  " (%d,%d)->(%d,%d)\n",
599  it.length(), good_column_count_, good_coverage_, bad_coverage_,
600  bounding_box_.left(), bounding_box_.bottom(),
601  bounding_box_.right(), bounding_box_.top());
602  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
603  ColPartition* part = it.data();
604  part->Print();
605  }
606 }
607 
608 // PRIVATE CODE.
609 
610 // Add the given partition to the list in the appropriate place.
611 void ColPartitionSet::AddPartition(ColPartition* new_part,
612  ColPartition_IT* it) {
613  AddPartitionCoverageAndBox(*new_part);
614  int new_right = new_part->right_key();
615  if (it->data()->left_key() >= new_right)
616  it->add_before_stay_put(new_part);
617  else
618  it->add_after_stay_put(new_part);
619 }
620 
621 // Compute the coverage and good column count. Coverage is the amount of the
622 // width of the page (in pixels) that is covered by ColPartitions, which are
623 // used to provide candidate column layouts.
624 // Coverage is split into good and bad. Good coverage is provided by
625 // ColPartitions of a frequent width (according to the callback function
626 // provided by TabFinder::WidthCB, which accesses stored statistics on the
627 // widths of ColParititions) and bad coverage is provided by all other
628 // ColPartitions, even if they have tab vectors at both sides. Thus:
629 // |-----------------------------------------------------------------|
630 // | Double width heading |
631 // |-----------------------------------------------------------------|
632 // |-------------------------------| |-------------------------------|
633 // | Common width ColParition | | Common width ColPartition |
634 // |-------------------------------| |-------------------------------|
635 // the layout with two common-width columns has better coverage than the
636 // double width heading, because the coverage is "good," even though less in
637 // total coverage than the heading, because the heading coverage is "bad."
638 void ColPartitionSet::ComputeCoverage() {
639  // Count the number of good columns and sum their width.
640  ColPartition_IT it(&parts_);
641  good_column_count_ = 0;
642  good_coverage_ = 0;
643  bad_coverage_ = 0;
644  bounding_box_ = TBOX();
645  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
646  ColPartition* part = it.data();
647  AddPartitionCoverageAndBox(*part);
648  }
649 }
650 
651 // Adds the coverage, column count and box for a single partition,
652 // without adding it to the list. (Helper factored from ComputeCoverage.)
653 void ColPartitionSet::AddPartitionCoverageAndBox(const ColPartition& part) {
654  bounding_box_ += part.bounding_box();
655  int coverage = part.ColumnWidth();
656  if (part.good_width()) {
657  good_coverage_ += coverage;
658  good_column_count_ += 2;
659  } else {
660  if (part.blob_type() < BRT_UNKNOWN)
661  coverage /= 2;
662  if (part.good_column())
663  ++good_column_count_;
664  bad_coverage_ += coverage;
665  }
666 }
667 
668 } // namespace tesseract.
static bool IsTextType(BlobRegionType type)
Definition: blobbox.h:403
int LeftAtY(int y) const
Definition: colpartition.h:340
void CopyRightTab(const ColPartition &src, bool take_box)
bool CompatibleColumns(bool debug, ColPartitionSet *other, WidthCallback *cb)
T & get(int index) const
integer coordinate
Definition: points.h:30
static bool WithinTestRegion(int detail_level, int x, int y)
virtual R Run(A1)=0
void insert(T t, int index)
int UnmatchedWidth(ColPartitionSet *part_set)
ColPartition * column() const
void Line(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:538
ColumnSpanningType SpanningType(int resolution, int left, int right, int height, int y, int left_margin, int right_margin, int *first_col, int *last_col, int *first_spanned_col)
inT16 width() const
Definition: rect.h:111
ELISTIZE(AmbigSpec)
void ImproveColumnCandidate(WidthCallback *cb, PartSetVector *src_sets)
void SetColumnGoodness(WidthCallback *cb)
bool ColumnContains(int x, int y) const
Definition: colpartition.h:353
int push_back(T object)
void AccumulateColumnWidthsAndGaps(int *total_width, int *width_samples, int *total_gap, int *gap_samples)
inT16 bottom() const
Definition: rect.h:61
void GetColumnBoxes(int y_bottom, int y_top, ColSegment_LIST *segments)
void AddToColumnSetsIfUnique(PartSetVector *column_sets, WidthCallback *cb)
void set_column(ColPartition *col)
void InsertCompletedBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
void DisplayColumnEdges(int y_bottom, int y_top, ScrollView *win)
int RightAtY(int y) const
Definition: colpartition.h:344
void CopyLeftTab(const ColPartition &src, bool take_box)
inT16 left() const
Definition: rect.h:68
const double kMinColumnWidth
#define MIN_INT32
Definition: host.h:61
ColPartition * ShallowCopy() const
#define tprintf(...)
Definition: tprintf.h:31
ColPartition * GetColumnByIndex(int index)
void ExtractCompletedBlocks(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
int size() const
Definition: genericvector.h:72
void ChangeWorkColumns(const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
bool MatchingColumns(const ColPartition &other) const
inT16 top() const
Definition: rect.h:54
bool good_column() const
Definition: colpartition.h:166
ColPartitionSet * Copy(bool good_only)
ColPartition * ColumnContaining(int x, int y)
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:324
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:187
const TBOX & bounding_box() const
Definition: colpartition.h:109
BlobRegionType blob_type() const
Definition: colpartition.h:148
#define ASSERT_HOST(x)
Definition: errcode.h:84