tesseract  3.05.02
tesseract::TabFind Class Reference

#include <tabfind.h>

Inheritance diagram for tesseract::TabFind:
tesseract::AlignedBlob tesseract::BlobGrid tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > tesseract::GridBase tesseract::ColumnFinder

Public Member Functions

 TabFind (int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines, int vertical_x, int vertical_y, int resolution)
 
virtual ~TabFind ()
 
void InsertBlobsToGrid (bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
bool InsertBlob (bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
 
void SetBlockRuleEdges (TO_BLOCK *block)
 
void SetBlobRuleEdges (BLOBNBOX_LIST *blobs)
 
int GutterWidth (int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables, int max_gutter_width, int *required_shift)
 
void GutterWidthAndNeighbourGap (int tab_x, int mean_height, int max_gutter, bool left, BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap)
 
int RightEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
int LeftEdgeForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorRightTabForBox (const TBOX &box, bool crossing, bool extended)
 
TabVectorLeftTabForBox (const TBOX &box, bool crossing, bool extended)
 
bool CommonWidth (int width)
 
WidthCallbackWidthCB ()
 
const ICOORDimage_origin () const
 
- Public Member Functions inherited from tesseract::AlignedBlob
 AlignedBlob (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~AlignedBlob ()
 
ScrollViewDisplayTabs (const char *window_name, ScrollView *tab_win)
 
TabVectorFindVerticalAlignment (AlignedBlobParams align_params, BLOBNBOX *bbox, int *vertical_x, int *vertical_y)
 
- Public Member Functions inherited from tesseract::BlobGrid
 BlobGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BlobGrid ()
 
void InsertBlobList (BLOBNBOX_LIST *blobs)
 
- Public Member Functions inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
 BBGrid ()
 
 BBGrid (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~BBGrid ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
void Clear ()
 
void ClearGridData (void(*free_method)(BLOBNBOX *))
 
void InsertBBox (bool h_spread, bool v_spread, BLOBNBOX *bbox)
 
void InsertPixPtBBox (int left, int bottom, Pix *pix, BLOBNBOX *bbox)
 
void RemoveBBox (BLOBNBOX *bbox)
 
bool RectangleEmpty (const TBOX &rect)
 
IntGridCountCellElements ()
 
ScrollViewMakeWindow (int x, int y, const char *window_name)
 
void DisplayBoxes (ScrollView *window)
 
void AssertNoDuplicates ()
 
virtual void HandleClick (int x, int y)
 
- Public Member Functions inherited from tesseract::GridBase
 GridBase ()
 
 GridBase (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
virtual ~GridBase ()
 
void Init (int gridsize, const ICOORD &bleft, const ICOORD &tright)
 
int gridsize () const
 
int gridwidth () const
 
int gridheight () const
 
const ICOORDbleft () const
 
const ICOORDtright () const
 
void GridCoords (int x, int y, int *grid_x, int *grid_y) const
 
void ClipGridCoords (int *x, int *y) const
 

Static Public Member Functions

static bool DifferentSizes (int size1, int size2)
 
static bool VeryDifferentSizes (int size1, int size2)
 
- Static Public Member Functions inherited from tesseract::AlignedBlob
static bool WithinTestRegion (int detail_level, int x, int y)
 
static void IncrementDebugPix ()
 
static const STRINGtextord_debug_pix ()
 

Protected Member Functions

TabVector_LIST * vectors ()
 
TabVector_LIST * dead_vectors ()
 
bool FindTabVectors (TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, int min_gutter_width, double tabfind_aligned_gap_fraction, ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew)
 
void DontFindTabVectors (BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew, FCOORD *reskew)
 
void TidyBlobs (TO_BLOCK *block)
 
void SetupTabSearch (int x, int y, int *min_key, int *max_key)
 
ScrollViewDisplayTabVectors (ScrollView *tab_win)
 
ScrollViewFindInitialTabVectors (BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
 
void ResetForVerticalText (const FCOORD &rotate, const FCOORD &rerotate, TabVector_LIST *horizontal_lines, int *min_gutter_width)
 
void Reset ()
 
void ReflectInYAxis ()
 

Static Protected Member Functions

static void RotateBlobList (const FCOORD &rotation, BLOBNBOX_LIST *blobs)
 

Protected Attributes

ICOORD vertical_skew_
 
int resolution_
 
- Protected Attributes inherited from tesseract::BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT >
BLOBNBOX_CLIST * grid_
 
- Protected Attributes inherited from tesseract::GridBase
int gridsize_
 
int gridwidth_
 
int gridheight_
 
int gridbuckets_
 
ICOORD bleft_
 
ICOORD tright_
 

Detailed Description

The TabFind class contains code to find tab-stops and maintain the vectors_ list of tab vectors. Also provides an interface to find neighbouring blobs in the grid of BLOBNBOXes that is used by multiple subclasses. Searching is a complex operation because of the need to enforce rule/separator lines, and tabstop boundaries, (when available), so as the holder of the list of TabVectors this class provides the functions.

Definition at line 53 of file tabfind.h.

Constructor & Destructor Documentation

◆ TabFind()

tesseract::TabFind::TabFind ( int  gridsize,
const ICOORD bleft,
const ICOORD tright,
TabVector_LIST *  vlines,
int  vertical_x,
int  vertical_y,
int  resolution 
)

Definition at line 65 of file tabfind.cpp.

69  resolution_(resolution),
70  image_origin_(0, tright.y() - 1) {
71  width_cb_ = NULL;
72  v_it_.set_to_list(&vectors_);
73  v_it_.add_list_after(vlines);
74  SetVerticalSkewAndParellelize(vertical_x, vertical_y);
76 }
const ICOORD & tright() const
Definition: bbgrid.h:75
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
Definition: tesscallback.h:116
bool CommonWidth(int width)
Definition: tabfind.cpp:395
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright)
inT16 y() const
access_function
Definition: points.h:56

◆ ~TabFind()

tesseract::TabFind::~TabFind ( )
virtual

Definition at line 78 of file tabfind.cpp.

78  {
79  if (width_cb_ != NULL)
80  delete width_cb_;
81 }

Member Function Documentation

◆ CommonWidth()

bool tesseract::TabFind::CommonWidth ( int  width)

Return true if the given width is close to one of the common widths in column_widths_.

Definition at line 395 of file tabfind.cpp.

395  {
396  width /= kColumnWidthFactor;
397  ICOORDELT_IT it(&column_widths_);
398  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
399  ICOORDELT* w = it.data();
400  if (w->x() - 1 <= width && width <= w->y() + 1)
401  return true;
402  }
403  return false;
404 }
const int kColumnWidthFactor
Definition: tabfind.h:42
inT16 x() const
access function
Definition: points.h:52
inT16 y() const
access_function
Definition: points.h:56

◆ dead_vectors()

TabVector_LIST* tesseract::TabFind::dead_vectors ( )
inlineprotected

Definition at line 176 of file tabfind.h.

176  {
177  return &dead_vectors_;
178  }

◆ DifferentSizes()

bool tesseract::TabFind::DifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 2 different.

Definition at line 408 of file tabfind.cpp.

408  {
409  return size1 > size2 * 2 || size2 > size1 * 2;
410 }

◆ DisplayTabVectors()

ScrollView * tesseract::TabFind::DisplayTabVectors ( ScrollView tab_win)
protected

Display the tab vectors found in this grid.

Definition at line 503 of file tabfind.cpp.

503  {
504 #ifndef GRAPHICS_DISABLED
505  // For every vector, display it.
506  TabVector_IT it(&vectors_);
507  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
508  TabVector* vector = it.data();
509  vector->Display(tab_win);
510  }
511  tab_win->Update();
512 #endif
513  return tab_win;
514 }
static void Update()
Definition: scrollview.cpp:715

◆ DontFindTabVectors()

void tesseract::TabFind::DontFindTabVectors ( BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
FCOORD deskew,
FCOORD reskew 
)
protected

Definition at line 458 of file tabfind.cpp.

459  {
460  InsertBlobsToGrid(false, false, image_blobs, this);
461  InsertBlobsToGrid(true, false, &block->blobs, this);
462  deskew->set_x(1.0f);
463  deskew->set_y(0.0f);
464  reskew->set_x(1.0f);
465  reskew->set_y(0.0f);
466 }
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
void set_x(float xin)
rewrite function
Definition: points.h:216
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:92
void set_y(float yin)
rewrite function
Definition: points.h:220

◆ FindInitialTabVectors()

ScrollView * tesseract::TabFind::FindInitialTabVectors ( BLOBNBOX_LIST *  image_blobs,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
TO_BLOCK block 
)
protected

Definition at line 520 of file tabfind.cpp.

523  {
525  ScrollView* line_win = MakeWindow(0, 0, "VerticalLines");
526  line_win = DisplayTabVectors(line_win);
527  }
528  // Prepare the grid.
529  if (image_blobs != NULL)
530  InsertBlobsToGrid(true, false, image_blobs, this);
531  InsertBlobsToGrid(true, false, &block->blobs, this);
532  ScrollView* initial_win = FindTabBoxes(min_gutter_width,
533  tabfind_aligned_gap_fraction);
534  FindAllTabVectors(min_gutter_width);
535 
537  SortVectors();
538  EvaluateTabs();
539  if (textord_tabfind_show_initialtabs && initial_win != NULL)
540  initial_win = DisplayTabVectors(initial_win);
541  MarkVerticalText();
542  return initial_win;
543 }
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:503
ICOORD vertical_skew_
Definition: tabfind.h:367
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:361
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:592
void InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:92
bool textord_tabfind_show_initialtabs
Definition: tabfind.cpp:62

◆ FindTabVectors()

bool tesseract::TabFind::FindTabVectors ( TabVector_LIST *  hlines,
BLOBNBOX_LIST *  image_blobs,
TO_BLOCK block,
int  min_gutter_width,
double  tabfind_aligned_gap_fraction,
ColPartitionGrid part_grid,
FCOORD deskew,
FCOORD reskew 
)
protected

Top-level function to find TabVectors in an input page block. Returns false if the detected skew angle is impossible. Applies the detected skew angle to deskew the tabs, blobs and part_grid. tabfind_aligned_gap_fraction should be the value of parameter textord_tabfind_aligned_gap_fraction

Definition at line 423 of file tabfind.cpp.

428  {
429  ScrollView* tab_win = FindInitialTabVectors(image_blobs, min_gutter_width,
430  tabfind_aligned_gap_fraction,
431  block);
432  ComputeColumnWidths(tab_win, part_grid);
434  SortVectors();
435  CleanupTabs();
436  if (!Deskew(hlines, image_blobs, block, deskew, reskew))
437  return false; // Skew angle is too large.
438  part_grid->Deskew(*deskew);
439  ApplyTabConstraints();
440  #ifndef GRAPHICS_DISABLED
442  tab_win = MakeWindow(640, 50, "FinalTabs");
443  if (textord_debug_images) {
444  tab_win->Image(AlignedBlob::textord_debug_pix().string(),
445  image_origin_.x(), image_origin_.y());
446  } else {
447  DisplayBoxes(tab_win);
448  DisplayTabs("FinalTabs", tab_win);
449  }
450  tab_win = DisplayTabVectors(tab_win);
451  }
452  #endif // GRAPHICS_DISABLED
453  return true;
454 }
void Image(struct Pix *image, int x_pos, int y_pos)
Definition: scrollview.cpp:773
bool textord_debug_images
Definition: alignedblob.cpp:33
ScrollView * DisplayTabVectors(ScrollView *tab_win)
Definition: tabfind.cpp:503
ICOORD vertical_skew_
Definition: tabfind.h:367
static const STRING & textord_debug_pix()
Definition: alignedblob.h:112
bool textord_tabfind_show_finaltabs
Definition: tabfind.cpp:63
static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, BlobGrid *grid)
Definition: tabvector.cpp:361
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:592
inT16 x() const
access function
Definition: points.h:52
ScrollView * FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width, double tabfind_aligned_gap_fraction, TO_BLOCK *block)
Definition: tabfind.cpp:520
ScrollView * DisplayTabs(const char *window_name, ScrollView *tab_win)
inT16 y() const
access_function
Definition: points.h:56

◆ GutterWidth()

int tesseract::TabFind::GutterWidth ( int  bottom_y,
int  top_y,
const TabVector v,
bool  ignore_unmergeables,
int  max_gutter_width,
int *  required_shift 
)

Definition at line 162 of file tabfind.cpp.

164  {
165  bool right_to_left = v.IsLeftTab();
166  int bottom_x = v.XAtY(bottom_y);
167  int top_x = v.XAtY(top_y);
168  int start_x = right_to_left ? MAX(top_x, bottom_x) : MIN(top_x, bottom_x);
169  BlobGridSearch sidesearch(this);
170  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
171  int min_gap = max_gutter_width;
172  *required_shift = 0;
173  BLOBNBOX* blob = NULL;
174  while ((blob = sidesearch.NextSideSearch(right_to_left)) != NULL) {
175  const TBOX& box = blob->bounding_box();
176  if (box.bottom() >= top_y || box.top() <= bottom_y)
177  continue; // Doesn't overlap enough.
178  if (box.height() >= gridsize() * 2 &&
179  box.height() > box.width() * kLineFragmentAspectRatio) {
180  // Skip likely separator line residue.
181  continue;
182  }
183  if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type()))
184  continue; // Skip non-text if required.
185  int mid_y = (box.bottom() + box.top()) / 2;
186  // We use the x at the mid-y so that the required_shift guarantees
187  // to clear all the blobs on the tab-stop. If we use the min/max
188  // of x at top/bottom of the blob, then exactness would be required,
189  // which is not a good thing.
190  int tab_x = v.XAtY(mid_y);
191  int gap;
192  if (right_to_left) {
193  gap = tab_x - box.right();
194  if (gap < 0 && box.left() - tab_x < *required_shift)
195  *required_shift = box.left() - tab_x;
196  } else {
197  gap = box.left() - tab_x;
198  if (gap < 0 && box.right() - tab_x > *required_shift)
199  *required_shift = box.right() - tab_x;
200  }
201  if (gap > 0 && gap < min_gap)
202  min_gap = gap;
203  }
204  // Result may be negative, in which case, this is a really bad tabstop.
205  return min_gap - abs(*required_shift);
206 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
const double kLineFragmentAspectRatio
Definition: tabfind.cpp:54
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:415
inT16 width() const
Definition: rect.h:111
#define MIN(x, y)
Definition: ndminx.h:28
inT16 bottom() const
Definition: rect.h:61
inT16 left() const
Definition: rect.h:68
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
Definition: blobgrid.h:31
inT16 height() const
Definition: rect.h:104
#define MAX(x, y)
Definition: ndminx.h:24
int gridsize() const
Definition: bbgrid.h:63
inT16 top() const
Definition: rect.h:54
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
BlobRegionType region_type() const
Definition: blobbox.h:268

◆ GutterWidthAndNeighbourGap()

void tesseract::TabFind::GutterWidthAndNeighbourGap ( int  tab_x,
int  mean_height,
int  max_gutter,
bool  left,
BLOBNBOX bbox,
int *  gutter_width,
int *  neighbour_gap 
)

Find the gutter width and distance to inner neighbour for the given blob.

Definition at line 209 of file tabfind.cpp.

212  {
213  const TBOX& box = bbox->bounding_box();
214  // The gutter and internal sides of the box.
215  int gutter_x = left ? box.left() : box.right();
216  int internal_x = left ? box.right() : box.left();
217  // On ragged edges, the gutter side of the box is away from the tabstop.
218  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
219  *gutter_width = max_gutter;
220  // If the box is away from the tabstop, we need to increase
221  // the allowed gutter width.
222  if (tab_gap > 0)
223  *gutter_width += tab_gap;
224  bool debug = WithinTestRegion(2, box.left(), box.bottom());
225  if (debug)
226  tprintf("Looking in gutter\n");
227  // Find the nearest blob on the outside of the column.
228  BLOBNBOX* gutter_bbox = AdjacentBlob(bbox, left,
229  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
230  *gutter_width, box.top(), box.bottom());
231  if (gutter_bbox != NULL) {
232  const TBOX& gutter_box = gutter_bbox->bounding_box();
233  *gutter_width = left ? tab_x - gutter_box.right()
234  : gutter_box.left() - tab_x;
235  }
236  if (*gutter_width >= max_gutter) {
237  // If there is no box because a tab was in the way, get the tab coord.
238  TBOX gutter_box(box);
239  if (left) {
240  gutter_box.set_left(tab_x - max_gutter - 1);
241  gutter_box.set_right(tab_x - max_gutter);
242  int tab_gutter = RightEdgeForBox(gutter_box, true, false);
243  if (tab_gutter < tab_x - 1)
244  *gutter_width = tab_x - tab_gutter;
245  } else {
246  gutter_box.set_left(tab_x + max_gutter);
247  gutter_box.set_right(tab_x + max_gutter + 1);
248  int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
249  if (tab_gutter > tab_x + 1)
250  *gutter_width = tab_gutter - tab_x;
251  }
252  }
253  if (*gutter_width > max_gutter)
254  *gutter_width = max_gutter;
255  // Now look for a neighbour on the inside.
256  if (debug)
257  tprintf("Looking for neighbour\n");
258  BLOBNBOX* neighbour = AdjacentBlob(bbox, !left,
259  bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
260  *gutter_width, box.top(), box.bottom());
261  int neighbour_edge = left ? RightEdgeForBox(box, true, false)
262  : LeftEdgeForBox(box, true, false);
263  if (neighbour != NULL) {
264  const TBOX& n_box = neighbour->bounding_box();
265  if (debug) {
266  tprintf("Found neighbour:");
267  n_box.print();
268  }
269  if (left && n_box.left() < neighbour_edge)
270  neighbour_edge = n_box.left();
271  else if (!left && n_box.right() > neighbour_edge)
272  neighbour_edge = n_box.right();
273  }
274  *neighbour_gap = left ? neighbour_edge - internal_x
275  : internal_x - neighbour_edge;
276 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
static bool WithinTestRegion(int detail_level, int x, int y)
BlobTextFlowType flow() const
Definition: blobbox.h:280
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:282
void set_left(int x)
Definition: rect.h:71
inT16 bottom() const
Definition: rect.h:61
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:287
inT16 left() const
Definition: rect.h:68
void print() const
Definition: rect.h:270
void set_right(int x)
Definition: rect.h:78
#define tprintf(...)
Definition: tprintf.h:31
inT16 top() const
Definition: rect.h:54
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75

◆ image_origin()

const ICOORD& tesseract::TabFind::image_origin ( ) const
inline

Return the coords at which to draw the image backdrop.

Definition at line 165 of file tabfind.h.

165  {
166  return image_origin_;
167  }

◆ InsertBlob()

bool tesseract::TabFind::InsertBlob ( bool  h_spread,
bool  v_spread,
BLOBNBOX blob,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a single blob into the given grid (not necessarily this). If h_spread, then all cells covered horizontally by the box are used, otherwise, just the bottom-left. Similarly for v_spread. A side effect is that the left and right rule edges of the blob are set according to the tab vectors in this (not grid).

Definition at line 119 of file tabfind.cpp.

121  {
122  TBOX box = blob->bounding_box();
123  blob->set_left_rule(LeftEdgeForBox(box, false, false));
124  blob->set_right_rule(RightEdgeForBox(box, false, false));
125  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
126  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
127  if (blob->joined_to_prev())
128  return false;
129  grid->InsertBBox(h_spread, v_spread, blob);
130  return true;
131 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:282
void set_right_rule(int new_right)
Definition: blobbox.h:307
bool joined_to_prev() const
Definition: blobbox.h:241
void set_left_rule(int new_left)
Definition: blobbox.h:301
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:287
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
Definition: bbgrid.h:489
Definition: rect.h:30
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:319
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:313

◆ InsertBlobsToGrid()

void tesseract::TabFind::InsertBlobsToGrid ( bool  h_spread,
bool  v_spread,
BLOBNBOX_LIST *  blobs,
BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *  grid 
)

Insert a list of blobs into the given grid (not necessarily this). See InsertBlob for the other arguments. It would seem to make more sense to swap this and grid, but this way around allows grid to not be derived from TabFind, eg a ColPartitionGrid, while the grid that provides the tab stops(this) has to be derived from TabFind.

Definition at line 92 of file tabfind.cpp.

95  {
96  BLOBNBOX_IT blob_it(blobs);
97  int b_count = 0;
98  int reject_count = 0;
99  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
100  BLOBNBOX* blob = blob_it.data();
101 // if (InsertBlob(true, true, blob, grid)) {
102  if (InsertBlob(h_spread, v_spread, blob, grid)) {
103  ++b_count;
104  } else {
105  ++reject_count;
106  }
107  }
108  if (textord_debug_tabfind) {
109  tprintf("Inserted %d blobs into grid, %d rejected.\n",
110  b_count, reject_count);
111  }
112 }
bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob, BBGrid< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > *grid)
Definition: tabfind.cpp:119
int textord_debug_tabfind
Definition: alignedblob.cpp:27
#define tprintf(...)
Definition: tprintf.h:31

◆ LeftEdgeForBox()

int tesseract::TabFind::LeftEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightEdgeForBox, but finds the left Edge instead.

Definition at line 287 of file tabfind.cpp.

287  {
288  TabVector* v = LeftTabForBox(box, crossing, extended);
289  return v == NULL ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
290 }
TabVector * LeftTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:349
inT16 bottom() const
Definition: rect.h:61
inT16 x() const
access function
Definition: points.h:52
inT16 top() const
Definition: rect.h:54

◆ LeftTabForBox()

TabVector * tesseract::TabFind::LeftTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

As RightTabForBox, but finds the left TabVector instead.

Definition at line 349 of file tabfind.cpp.

350  {
351  if (v_it_.empty())
352  return NULL;
353  int top_y = box.top();
354  int bottom_y = box.bottom();
355  int mid_y = (top_y + bottom_y) / 2;
356  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
357  int min_key, max_key;
358  SetupTabSearch(left, mid_y, &min_key, &max_key);
359  // Position the iterator at the last TabVector with sort_key <= max_key.
360  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key)
361  v_it_.forward();
362  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
363  v_it_.backward();
364  }
365  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
366  TabVector* best_v = NULL;
367  int best_x = -1;
368  int key_limit = -1;
369  do {
370  TabVector* v = v_it_.data();
371  int x = v->XAtY(mid_y);
372  if (x <= left &&
373  (v->VOverlap(top_y, bottom_y) > 0 ||
374  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
375  if (best_v == NULL || x > best_x) {
376  best_v = v;
377  best_x = x;
378  // We can guarantee that no better vector can be found if the
379  // sort key is less than that of the best by max_key - min_key.
380  key_limit = v->sort_key() - (max_key - min_key);
381  }
382  }
383  // Break when the search is done to avoid wrapping the iterator and
384  // thereby potentially slowing the next search.
385  if (v_it_.at_first() ||
386  (best_v != NULL && v->sort_key() < key_limit))
387  break; // Prevent restarting list for next call.
388  v_it_.backward();
389  } while (!v_it_.at_last());
390  return best_v;
391 }
inT16 bottom() const
Definition: rect.h:61
inT16 left() const
Definition: rect.h:68
inT16 top() const
Definition: rect.h:54
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:496
inT16 right() const
Definition: rect.h:75

◆ ReflectInYAxis()

void tesseract::TabFind::ReflectInYAxis ( )
protected

Definition at line 1388 of file tabfind.cpp.

1388  {
1389  TabVector_LIST temp_list;
1390  TabVector_IT temp_it(&temp_list);
1391  v_it_.move_to_first();
1392  // The TabVector list only contains vertical lines, but they need to be
1393  // reflected and the list needs to be reversed, so they are still in
1394  // sort_key order.
1395  while (!v_it_.empty()) {
1396  TabVector* v = v_it_.extract();
1397  v_it_.forward();
1398  v->ReflectInYAxis();
1399  temp_it.add_before_then_move(v);
1400  }
1401  v_it_.add_list_after(&temp_list);
1402  v_it_.move_to_first();
1403  // Reset this grid with reflected bounding boxes.
1404  TBOX grid_box(bleft(), tright());
1405  int tmp = grid_box.left();
1406  grid_box.set_left(-grid_box.right());
1407  grid_box.set_right(-tmp);
1408  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1409 }
const ICOORD & tright() const
Definition: bbgrid.h:75
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
Definition: rect.h:30
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:447

◆ Reset()

void tesseract::TabFind::Reset ( )
protected

Definition at line 1377 of file tabfind.cpp.

1377  {
1378  v_it_.move_to_first();
1379  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1380  if (!v_it_.data()->IsSeparator())
1381  delete v_it_.extract();
1382  }
1383  Clear();
1384 }

◆ ResetForVerticalText()

void tesseract::TabFind::ResetForVerticalText ( const FCOORD rotate,
const FCOORD rerotate,
TabVector_LIST *  horizontal_lines,
int *  min_gutter_width 
)
protected

Definition at line 1332 of file tabfind.cpp.

1334  {
1335  // Rotate the horizontal and vertical vectors and swap them over.
1336  // Only the separators are kept and rotated; other tabs are used
1337  // to estimate the gutter width then thrown away.
1338  TabVector_LIST ex_verticals;
1339  TabVector_IT ex_v_it(&ex_verticals);
1340  TabVector_LIST vlines;
1341  TabVector_IT v_it(&vlines);
1342  while (!v_it_.empty()) {
1343  TabVector* v = v_it_.extract();
1344  if (v->IsSeparator()) {
1345  v->Rotate(rotate);
1346  ex_v_it.add_after_then_move(v);
1347  } else {
1348  v_it.add_after_then_move(v);
1349  }
1350  v_it_.forward();
1351  }
1352 
1353  // Adjust the min gutter width for better tabbox selection
1354  // in 2nd call to FindInitialTabVectors().
1355  int median_gutter = FindMedianGutterWidth(&vlines);
1356  if (median_gutter > *min_gutter_width)
1357  *min_gutter_width = median_gutter;
1358 
1359  TabVector_IT h_it(horizontal_lines);
1360  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1361  TabVector* h = h_it.data();
1362  h->Rotate(rotate);
1363  }
1364  v_it_.add_list_after(horizontal_lines);
1365  v_it_.move_to_first();
1366  h_it.set_to_list(horizontal_lines);
1367  h_it.add_list_after(&ex_verticals);
1368 
1369  // Rebuild the grid to the new size.
1370  TBOX grid_box(bleft(), tright());
1371  grid_box.rotate_large(rotate);
1372  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1373 }
const ICOORD & tright() const
Definition: bbgrid.h:75
int gridsize() const
Definition: bbgrid.h:63
const ICOORD & bleft() const
Definition: bbgrid.h:72
Definition: rect.h:30
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
Definition: bbgrid.h:447

◆ RightEdgeForBox()

int tesseract::TabFind::RightEdgeForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the x-coord that corresponds to the right edge for the given box. If there is a rule line to the right that vertically overlaps it, then return the x-coord of the rule line, otherwise return the right edge of the page. For details see RightTabForBox below.

Definition at line 282 of file tabfind.cpp.

282  {
283  TabVector* v = RightTabForBox(box, crossing, extended);
284  return v == NULL ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
285 }
TabVector * RightTabForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:305
inT16 bottom() const
Definition: rect.h:61
inT16 x() const
access function
Definition: points.h:52
inT16 top() const
Definition: rect.h:54
ICOORD tright_
Definition: bbgrid.h:91

◆ RightTabForBox()

TabVector * tesseract::TabFind::RightTabForBox ( const TBOX box,
bool  crossing,
bool  extended 
)

Return the TabVector that corresponds to the right edge for the given box. If there is a TabVector to the right that vertically overlaps it, then return it, otherwise return NULL. Note that Right and Left refer to the position of the TabVector, not its type, ie RightTabForBox returns the nearest TabVector to the right of the box, regardless of its type. If a TabVector crosses right through the box (as opposed to grazing one edge or missing entirely), then crossing false will ignore such a line. Crossing true will return the line for BOTH left and right edges. If extended is true, then TabVectors are considered to extend to their extended_start/end_y, otherwise, just the startpt_ and endpt_. These functions make use of an internal iterator to the vectors_ list for speed when used repeatedly on neighbouring boxes. The caveat is that the iterator must be updated whenever the list is modified.

Definition at line 305 of file tabfind.cpp.

306  {
307  if (v_it_.empty())
308  return NULL;
309  int top_y = box.top();
310  int bottom_y = box.bottom();
311  int mid_y = (top_y + bottom_y) / 2;
312  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
313  int min_key, max_key;
314  SetupTabSearch(right, mid_y, &min_key, &max_key);
315  // Position the iterator at the first TabVector with sort_key >= min_key.
316  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key)
317  v_it_.backward();
318  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key)
319  v_it_.forward();
320  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
321  TabVector* best_v = NULL;
322  int best_x = -1;
323  int key_limit = -1;
324  do {
325  TabVector* v = v_it_.data();
326  int x = v->XAtY(mid_y);
327  if (x >= right &&
328  (v->VOverlap(top_y, bottom_y) > 0 ||
329  (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
330  if (best_v == NULL || x < best_x) {
331  best_v = v;
332  best_x = x;
333  // We can guarantee that no better vector can be found if the
334  // sort key exceeds that of the best by max_key - min_key.
335  key_limit = v->sort_key() + max_key - min_key;
336  }
337  }
338  // Break when the search is done to avoid wrapping the iterator and
339  // thereby potentially slowing the next search.
340  if (v_it_.at_last() ||
341  (best_v != NULL && v->sort_key() > key_limit))
342  break; // Prevent restarting list for next call.
343  v_it_.forward();
344  } while (!v_it_.at_first());
345  return best_v;
346 }
inT16 bottom() const
Definition: rect.h:61
inT16 left() const
Definition: rect.h:68
inT16 top() const
Definition: rect.h:54
void SetupTabSearch(int x, int y, int *min_key, int *max_key)
Definition: tabfind.cpp:496
inT16 right() const
Definition: rect.h:75

◆ RotateBlobList()

void tesseract::TabFind::RotateBlobList ( const FCOORD rotation,
BLOBNBOX_LIST *  blobs 
)
staticprotected

Definition at line 1262 of file tabfind.cpp.

1262  {
1263  BLOBNBOX_IT it(blobs);
1264  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1265  it.data()->rotate_box(rotation);
1266  }
1267 }

◆ SetBlobRuleEdges()

void tesseract::TabFind::SetBlobRuleEdges ( BLOBNBOX_LIST *  blobs)

Definition at line 143 of file tabfind.cpp.

143  {
144  BLOBNBOX_IT blob_it(blobs);
145  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
146  BLOBNBOX* blob = blob_it.data();
147  TBOX box = blob->bounding_box();
148  blob->set_left_rule(LeftEdgeForBox(box, false, false));
149  blob->set_right_rule(RightEdgeForBox(box, false, false));
150  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
151  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
152  }
153 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
int RightEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:282
void set_right_rule(int new_right)
Definition: blobbox.h:307
void set_left_rule(int new_left)
Definition: blobbox.h:301
int LeftEdgeForBox(const TBOX &box, bool crossing, bool extended)
Definition: tabfind.cpp:287
Definition: rect.h:30
void set_right_crossing_rule(int new_right)
Definition: blobbox.h:319
void set_left_crossing_rule(int new_left)
Definition: blobbox.h:313

◆ SetBlockRuleEdges()

void tesseract::TabFind::SetBlockRuleEdges ( TO_BLOCK block)

Definition at line 134 of file tabfind.cpp.

134  {
135  SetBlobRuleEdges(&block->blobs);
136  SetBlobRuleEdges(&block->small_blobs);
137  SetBlobRuleEdges(&block->noise_blobs);
138  SetBlobRuleEdges(&block->large_blobs);
139 }
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
void SetBlobRuleEdges(BLOBNBOX_LIST *blobs)
Definition: tabfind.cpp:143

◆ SetupTabSearch()

void tesseract::TabFind::SetupTabSearch ( int  x,
int  y,
int *  min_key,
int *  max_key 
)
protected

Definition at line 496 of file tabfind.cpp.

496  {
497  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
498  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
499  *min_key = MIN(key1, key2);
500  *max_key = MAX(key1, key2);
501 }
#define MIN(x, y)
Definition: ndminx.h:28
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280
ICOORD vertical_skew_
Definition: tabfind.h:367
#define MAX(x, y)
Definition: ndminx.h:24
ICOORD tright_
Definition: bbgrid.h:91
inT16 y() const
access_function
Definition: points.h:56

◆ TidyBlobs()

void tesseract::TabFind::TidyBlobs ( TO_BLOCK block)
protected

Definition at line 471 of file tabfind.cpp.

471  {
472  BLOBNBOX_IT large_it = &block->large_blobs;
473  BLOBNBOX_IT blob_it = &block->blobs;
474  int b_count = 0;
475  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
476  BLOBNBOX* large_blob = large_it.data();
477  if (large_blob->owner() != NULL) {
478  blob_it.add_to_end(large_it.extract());
479  ++b_count;
480  }
481  }
482  if (textord_debug_tabfind) {
483  tprintf("Moved %d large blobs to normal list\n",
484  b_count);
485  #ifndef GRAPHICS_DISABLED
486  ScrollView* rej_win = MakeWindow(500, 300, "Image blobs");
487  block->plot_graded_blobs(rej_win);
488  block->plot_noise_blobs(rej_win);
489  rej_win->Update();
490  #endif // GRAPHICS_DISABLED
491  }
492  block->DeleteUnownedNoise();
493 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
static void Update()
Definition: scrollview.cpp:715
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
void DeleteUnownedNoise()
Definition: blobbox.cpp:1033
void plot_noise_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1059
ScrollView * MakeWindow(int x, int y, const char *window_name)
Definition: bbgrid.h:592
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
int textord_debug_tabfind
Definition: alignedblob.cpp:27
#define tprintf(...)
Definition: tprintf.h:31
void plot_graded_blobs(ScrollView *to_win)
Definition: blobbox.cpp:1067

◆ vectors()

TabVector_LIST* tesseract::TabFind::vectors ( )
inlineprotected

Accessors

Definition at line 173 of file tabfind.h.

173  {
174  return &vectors_;
175  }

◆ VeryDifferentSizes()

bool tesseract::TabFind::VeryDifferentSizes ( int  size1,
int  size2 
)
static

Return true if the sizes are more than a factor of 5 different.

Definition at line 414 of file tabfind.cpp.

414  {
415  return size1 > size2 * 5 || size2 > size1 * 5;
416 }

◆ WidthCB()

WidthCallback* tesseract::TabFind::WidthCB ( )
inline

Return a callback for testing CommonWidth.

Definition at line 158 of file tabfind.h.

158  {
159  return width_cb_;
160  }

Member Data Documentation

◆ resolution_

int tesseract::TabFind::resolution_
protected

Definition at line 368 of file tabfind.h.

◆ vertical_skew_

ICOORD tesseract::TabFind::vertical_skew_
protected

Definition at line 367 of file tabfind.h.


The documentation for this class was generated from the following files: