tesseract  3.05.02
tesseract::ColPartition Class Reference

#include <colpartition.h>

Inheritance diagram for tesseract::ColPartition:
ELIST2_LINK

Public Member Functions

 ColPartition ()
 
 ColPartition (BlobRegionType blob_type, const ICOORD &vertical)
 
 ~ColPartition ()
 
const TBOXbounding_box () const
 
int left_margin () const
 
void set_left_margin (int margin)
 
int right_margin () const
 
void set_right_margin (int margin)
 
int median_top () const
 
int median_bottom () const
 
int median_left () const
 
int median_right () const
 
int median_size () const
 
void set_median_size (int size)
 
int median_width () const
 
void set_median_width (int width)
 
BlobRegionType blob_type () const
 
void set_blob_type (BlobRegionType t)
 
BlobTextFlowType flow () const
 
void set_flow (BlobTextFlowType f)
 
int good_blob_score () const
 
bool good_width () const
 
bool good_column () const
 
bool left_key_tab () const
 
int left_key () const
 
bool right_key_tab () const
 
int right_key () const
 
PolyBlockType type () const
 
void set_type (PolyBlockType t)
 
BLOBNBOX_CLIST * boxes ()
 
int boxes_count () const
 
void set_vertical (const ICOORD &v)
 
ColPartition_CLIST * upper_partners ()
 
ColPartition_CLIST * lower_partners ()
 
void set_working_set (WorkingPartSet *working_set)
 
bool block_owned () const
 
void set_block_owned (bool owned)
 
bool desperately_merged () const
 
ColPartitionSetcolumn_set () const
 
void set_side_step (int step)
 
int bottom_spacing () const
 
void set_bottom_spacing (int spacing)
 
int top_spacing () const
 
void set_top_spacing (int spacing)
 
void set_table_type ()
 
void clear_table_type ()
 
bool inside_table_column ()
 
void set_inside_table_column (bool val)
 
ColPartitionnearest_neighbor_above () const
 
void set_nearest_neighbor_above (ColPartition *part)
 
ColPartitionnearest_neighbor_below () const
 
void set_nearest_neighbor_below (ColPartition *part)
 
int space_above () const
 
void set_space_above (int space)
 
int space_below () const
 
void set_space_below (int space)
 
int space_to_left () const
 
void set_space_to_left (int space)
 
int space_to_right () const
 
void set_space_to_right (int space)
 
uinT8color1 ()
 
uinT8color2 ()
 
bool owns_blobs () const
 
void set_owns_blobs (bool owns_blobs)
 
int MidY () const
 
int MedianY () const
 
int MidX () const
 
int SortKey (int x, int y) const
 
int XAtY (int sort_key, int y) const
 
int KeyWidth (int left_key, int right_key) const
 
int ColumnWidth () const
 
int BoxLeftKey () const
 
int BoxRightKey () const
 
int LeftAtY (int y) const
 
int RightAtY (int y) const
 
bool IsLeftOf (const ColPartition &other) const
 
bool ColumnContains (int x, int y) const
 
bool IsEmpty () const
 
bool IsSingleton () const
 
bool HOverlaps (const ColPartition &other) const
 
bool VOverlaps (const ColPartition &other) const
 
int VCoreOverlap (const ColPartition &other) const
 
int HCoreOverlap (const ColPartition &other) const
 
bool VSignificantCoreOverlap (const ColPartition &other) const
 
bool WithinSameMargins (const ColPartition &other) const
 
bool TypesMatch (const ColPartition &other) const
 
bool IsLineType () const
 
bool IsImageType () const
 
bool IsTextType () const
 
bool IsPulloutType () const
 
bool IsVerticalType () const
 
bool IsHorizontalType () const
 
bool IsUnMergeableType () const
 
bool IsVerticalLine () const
 
bool IsHorizontalLine () const
 
void AddBox (BLOBNBOX *box)
 
void RemoveBox (BLOBNBOX *box)
 
BLOBNBOXBiggestBox ()
 
TBOX BoundsWithoutBox (BLOBNBOX *box)
 
void ClaimBoxes ()
 
void DisownBoxes ()
 
void DisownBoxesNoAssert ()
 
bool ReleaseNonLeaderBoxes ()
 
void DeleteBoxes ()
 
void ReflectInYAxis ()
 
bool IsLegal ()
 
bool MatchingColumns (const ColPartition &other) const
 
bool MatchingTextColor (const ColPartition &other) const
 
bool MatchingSizes (const ColPartition &other) const
 
bool ConfirmNoTabViolation (const ColPartition &other) const
 
bool MatchingStrokeWidth (const ColPartition &other, double fractional_tolerance, double constant_tolerance) const
 
bool OKDiacriticMerge (const ColPartition &candidate, bool debug) const
 
void SetLeftTab (const TabVector *tab_vector)
 
void SetRightTab (const TabVector *tab_vector)
 
void CopyLeftTab (const ColPartition &src, bool take_box)
 
void CopyRightTab (const ColPartition &src, bool take_box)
 
int LeftBlobRule () const
 
int RightBlobRule () const
 
float SpecialBlobsDensity (const BlobSpecialTextType type) const
 
int SpecialBlobsCount (const BlobSpecialTextType type)
 
void SetSpecialBlobsDensity (const BlobSpecialTextType type, const float density)
 
void ComputeSpecialBlobsDensity ()
 
void AddPartner (bool upper, ColPartition *partner)
 
void RemovePartner (bool upper, ColPartition *partner)
 
ColPartitionSingletonPartner (bool upper)
 
void Absorb (ColPartition *other, WidthCallback *cb)
 
bool OKMergeOverlap (const ColPartition &merge1, const ColPartition &merge2, int ok_box_overlap, bool debug)
 
BLOBNBOXOverlapSplitBlob (const TBOX &box)
 
ColPartitionSplitAtBlob (BLOBNBOX *split_blob)
 
ColPartitionSplitAt (int split_x)
 
void ComputeLimits ()
 
int CountOverlappingBoxes (const TBOX &box)
 
void SetPartitionType (int resolution, ColPartitionSet *columns)
 
PolyBlockType PartitionType (ColumnSpanningType flow) const
 
void ColumnRange (int resolution, ColPartitionSet *columns, int *first_col, int *last_col)
 
void SetColumnGoodness (WidthCallback *cb)
 
bool MarkAsLeaderIfMonospaced ()
 
void SetRegionAndFlowTypesFromProjectionValue (int value)
 
void SetBlobTypes ()
 
bool HasGoodBaseline ()
 
void AddToWorkingSet (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *used_parts, WorkingPartSet_LIST *working_set)
 
TO_ROWMakeToRow ()
 
ColPartitionShallowCopy () const
 
ColPartitionCopyButDontOwnBlobs ()
 
ScrollView::Color BoxColor () const
 
void Print () const
 
void PrintColors ()
 
void SmoothPartnerRun (int working_set_count)
 
void RefinePartners (PolyBlockType type, bool get_desperate, ColPartitionGrid *grid)
 
bool IsInSameColumnAs (const ColPartition &part) const
 
void set_first_column (int column)
 
void set_last_column (int column)
 
- Public Member Functions inherited from ELIST2_LINK
 ELIST2_LINK ()
 
 ELIST2_LINK (const ELIST2_LINK &)
 
void operator= (const ELIST2_LINK &)
 

Static Public Member Functions

static ColPartitionMakeLinePartition (BlobRegionType blob_type, const ICOORD &vertical, int left, int bottom, int right, int top)
 
static ColPartitionFakePartition (const TBOX &box, PolyBlockType block_type, BlobRegionType blob_type, BlobTextFlowType flow)
 
static ColPartitionMakeBigPartition (BLOBNBOX *box, ColPartition_LIST *big_part_list)
 
static bool TypesMatch (BlobRegionType type1, BlobRegionType type2)
 
static bool TypesSimilar (PolyBlockType type1, PolyBlockType type2)
 
static void LineSpacingBlocks (const ICOORD &bleft, const ICOORD &tright, int resolution, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts, BLOCK_LIST *completed_blocks, TO_BLOCK_LIST *to_blocks)
 
static TO_BLOCKMakeBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static TO_BLOCKMakeVerticalTextBlock (const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
 
static int SortByBBox (const void *p1, const void *p2)
 

Detailed Description

ColPartition is a partition of a horizontal slice of the page. It starts out as a collection of blobs at a particular y-coord in the grid, but ends up (after merging and uniquing) as an approximate text line. ColPartitions are also used to hold a partitioning of the page into columns, each representing one column. Although a ColPartition applies to a given y-coordinate range, eventually, a ColPartitionSet of ColPartitions emerges, which represents the columns over a wide y-coordinate range.

Definition at line 67 of file colpartition.h.

Constructor & Destructor Documentation

◆ ColPartition() [1/2]

tesseract::ColPartition::ColPartition ( )
inline

Definition at line 69 of file colpartition.h.

69  {
70  // This empty constructor is here only so that the class can be ELISTIZED.
71  // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
72  // and eliminate CLASSNAME##_copier.
73  }

◆ ColPartition() [2/2]

tesseract::ColPartition::ColPartition ( BlobRegionType  blob_type,
const ICOORD vertical 
)
Parameters
blob_typeis the blob_region_type_ of the blobs in this partition.
verticalis the direction of logical vertical on the possibly skewed image.

Definition at line 83 of file colpartition.cpp.

84  : left_margin_(-MAX_INT32), right_margin_(MAX_INT32),
85  median_bottom_(MAX_INT32), median_top_(-MAX_INT32), median_size_(0),
86  median_left_(MAX_INT32), median_right_(-MAX_INT32), median_width_(0),
87  blob_type_(blob_type), flow_(BTFT_NONE), good_blob_score_(0),
88  good_width_(false), good_column_(false),
89  left_key_tab_(false), right_key_tab_(false),
90  left_key_(0), right_key_(0), type_(PT_UNKNOWN), vertical_(vertical),
91  working_set_(NULL), last_add_was_vertical_(false), block_owned_(false),
92  desperately_merged_(false),
93  first_column_(-1), last_column_(-1), column_set_(NULL),
94  side_step_(0), top_spacing_(0), bottom_spacing_(0),
95  type_before_table_(PT_UNKNOWN), inside_table_column_(false),
96  nearest_neighbor_above_(NULL), nearest_neighbor_below_(NULL),
97  space_above_(0), space_below_(0), space_to_left_(0), space_to_right_(0),
98  owns_blobs_(true) {
99  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
100 }
#define MAX_INT32
Definition: host.h:53
BlobRegionType blob_type() const
Definition: colpartition.h:148

◆ ~ColPartition()

tesseract::ColPartition::~ColPartition ( )

Definition at line 145 of file colpartition.cpp.

145  {
146  // Remove this as a partner of all partners, as we don't want them
147  // referring to a deleted object.
148  ColPartition_C_IT it(&upper_partners_);
149  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
150  it.data()->RemovePartner(false, this);
151  }
152  it.set_to_list(&lower_partners_);
153  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
154  it.data()->RemovePartner(true, this);
155  }
156 }

Member Function Documentation

◆ Absorb()

void tesseract::ColPartition::Absorb ( ColPartition other,
WidthCallback cb 
)

Definition at line 648 of file colpartition.cpp.

648  {
649  // The result has to either own all of the blobs or none of them.
650  // Verify the flag is consisent.
651  ASSERT_HOST(owns_blobs() == other->owns_blobs());
652  // TODO(nbeato): check owns_blobs better. Right now owns_blobs
653  // should always be true when this is called. So there is no issues.
654  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
655  bounding_box_.bottom()) ||
656  TabFind::WithinTestRegion(2, other->bounding_box_.left(),
657  other->bounding_box_.bottom())) {
658  tprintf("Merging:");
659  Print();
660  other->Print();
661  }
662 
663  // Update the special_blobs_densities_.
664  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
665  for (int type = 0; type < BSTT_COUNT; ++type) {
666  int w1 = boxes_.length(), w2 = other->boxes_.length();
667  float new_val = special_blobs_densities_[type] * w1 +
668  other->special_blobs_densities_[type] * w2;
669  if (!w1 || !w2) {
670  special_blobs_densities_[type] = new_val / (w1 + w2);
671  }
672  }
673 
674  // Merge the two sorted lists.
675  BLOBNBOX_C_IT it(&boxes_);
676  BLOBNBOX_C_IT it2(&other->boxes_);
677  for (; !it2.empty(); it2.forward()) {
678  BLOBNBOX* bbox2 = it2.extract();
679  ColPartition* prev_owner = bbox2->owner();
680  if (prev_owner != other && prev_owner != NULL) {
681  // A blob on other's list is owned by someone else; let them have it.
682  continue;
683  }
684  ASSERT_HOST(prev_owner == other || prev_owner == NULL);
685  if (prev_owner == other)
686  bbox2->set_owner(this);
687  it.add_to_end(bbox2);
688  }
689  left_margin_ = MIN(left_margin_, other->left_margin_);
690  right_margin_ = MAX(right_margin_, other->right_margin_);
691  if (other->left_key_ < left_key_) {
692  left_key_ = other->left_key_;
693  left_key_tab_ = other->left_key_tab_;
694  }
695  if (other->right_key_ > right_key_) {
696  right_key_ = other->right_key_;
697  right_key_tab_ = other->right_key_tab_;
698  }
699  // Combine the flow and blob_type in a sensible way.
700  // Dominant flows stay.
701  if (!DominatesInMerge(flow_, other->flow_)) {
702  flow_ = other->flow_;
703  blob_type_ = other->blob_type_;
704  }
705  SetBlobTypes();
706  if (IsVerticalType()) {
707  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
708  last_add_was_vertical_ = true;
709  } else {
710  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
711  last_add_was_vertical_ = false;
712  }
713  ComputeLimits();
714  // Fix partner lists. other is going away, so remove it as a
715  // partner of all its partners and add this in its place.
716  for (int upper = 0; upper < 2; ++upper) {
717  ColPartition_CLIST partners;
718  ColPartition_C_IT part_it(&partners);
719  part_it.add_list_after(upper ? &other->upper_partners_
720  : &other->lower_partners_);
721  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
722  ColPartition* partner = part_it.extract();
723  partner->RemovePartner(!upper, other);
724  partner->RemovePartner(!upper, this);
725  partner->AddPartner(!upper, this);
726  }
727  }
728  delete other;
729  if (cb != NULL) {
730  SetColumnGoodness(cb);
731  }
732 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
bool IsVerticalType() const
Definition: colpartition.h:435
static bool WithinTestRegion(int detail_level, int x, int y)
PolyBlockType type() const
Definition: colpartition.h:181
#define MIN(x, y)
Definition: ndminx.h:28
bool DominatesInMerge(BlobTextFlowType type1, BlobTextFlowType type2)
Definition: blobbox.h:114
void SetColumnGoodness(WidthCallback *cb)
inT16 bottom() const
Definition: rect.h:61
inT16 left() const
Definition: rect.h:68
#define MAX(x, y)
Definition: ndminx.h:24
#define tprintf(...)
Definition: tprintf.h:31
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ AddBox()

void tesseract::ColPartition::AddBox ( BLOBNBOX box)

Definition at line 179 of file colpartition.cpp.

179  {
180  TBOX box = bbox->bounding_box();
181  // Update the partition limits.
182  if (boxes_.length() == 0) {
183  bounding_box_ = box;
184  } else {
185  bounding_box_ += box;
186  }
187 
188  if (IsVerticalType()) {
189  if (!last_add_was_vertical_) {
190  boxes_.sort(SortByBoxBottom<BLOBNBOX>);
191  last_add_was_vertical_ = true;
192  }
193  boxes_.add_sorted(SortByBoxBottom<BLOBNBOX>, true, bbox);
194  } else {
195  if (last_add_was_vertical_) {
196  boxes_.sort(SortByBoxLeft<BLOBNBOX>);
197  last_add_was_vertical_ = false;
198  }
199  boxes_.add_sorted(SortByBoxLeft<BLOBNBOX>, true, bbox);
200  }
201  if (!left_key_tab_)
202  left_key_ = BoxLeftKey();
203  if (!right_key_tab_)
204  right_key_ = BoxRightKey();
205  if (TabFind::WithinTestRegion(2, box.left(), box.bottom()))
206  tprintf("Added box (%d,%d)->(%d,%d) left_blob_x_=%d, right_blob_x_ = %d\n",
207  box.left(), box.bottom(), box.right(), box.top(),
208  bounding_box_.left(), bounding_box_.right());
209 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
bool IsVerticalType() const
Definition: colpartition.h:435
static bool WithinTestRegion(int detail_level, int x, int y)
inT16 bottom() const
Definition: rect.h:61
inT16 left() const
Definition: rect.h:68
#define tprintf(...)
Definition: tprintf.h:31
inT16 top() const
Definition: rect.h:54
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75

◆ AddPartner()

void tesseract::ColPartition::AddPartner ( bool  upper,
ColPartition partner 
)

Definition at line 613 of file colpartition.cpp.

613  {
614  if (upper) {
615  partner->lower_partners_.add_sorted(SortByBoxLeft<ColPartition>,
616  true, this);
617  upper_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
618  } else {
619  partner->upper_partners_.add_sorted(SortByBoxLeft<ColPartition>,
620  true, this);
621  lower_partners_.add_sorted(SortByBoxLeft<ColPartition>, true, partner);
622  }
623 }

◆ AddToWorkingSet()

void tesseract::ColPartition::AddToWorkingSet ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  used_parts,
WorkingPartSet_LIST *  working_set 
)

Definition at line 1354 of file colpartition.cpp.

1357  {
1358  if (block_owned_)
1359  return; // Done it already.
1360  block_owned_ = true;
1361  WorkingPartSet_IT it(working_sets);
1362  // If there is an upper partner use its working_set_ directly.
1363  ColPartition* partner = SingletonPartner(true);
1364  if (partner != NULL && partner->working_set_ != NULL) {
1365  working_set_ = partner->working_set_;
1366  working_set_->AddPartition(this);
1367  return;
1368  }
1369  if (partner != NULL && textord_debug_bugs) {
1370  tprintf("Partition with partner has no working set!:");
1371  Print();
1372  partner->Print();
1373  }
1374  // Search for the column that the left edge fits in.
1375  WorkingPartSet* work_set = NULL;
1376  it.move_to_first();
1377  int col_index = 0;
1378  for (it.mark_cycle_pt(); !it.cycled_list() &&
1379  col_index != first_column_;
1380  it.forward(), ++col_index);
1381  if (textord_debug_tabfind >= 2) {
1382  tprintf("Match is %s for:", (col_index & 1) ? "Real" : "Between");
1383  Print();
1384  }
1385  if (it.cycled_list() && textord_debug_bugs) {
1386  tprintf("Target column=%d, only had %d\n", first_column_, col_index);
1387  }
1388  ASSERT_HOST(!it.cycled_list());
1389  work_set = it.data();
1390  // If last_column_ != first_column, then we need to scoop up all blocks
1391  // between here and the last_column_ and put back in work_set.
1392  if (!it.cycled_list() && last_column_ != first_column_ && !IsPulloutType()) {
1393  // Find the column that the right edge falls in.
1394  BLOCK_LIST completed_blocks;
1395  TO_BLOCK_LIST to_blocks;
1396  for (; !it.cycled_list() && col_index <= last_column_;
1397  it.forward(), ++col_index) {
1398  WorkingPartSet* end_set = it.data();
1399  end_set->ExtractCompletedBlocks(bleft, tright, resolution, used_parts,
1400  &completed_blocks, &to_blocks);
1401  }
1402  work_set->InsertCompletedBlocks(&completed_blocks, &to_blocks);
1403  }
1404  working_set_ = work_set;
1405  work_set->AddPartition(this);
1406 }
int textord_debug_bugs
Definition: alignedblob.cpp:28
void AddPartition(ColPartition *part)
ColPartition * SingletonPartner(bool upper)
int textord_debug_tabfind
Definition: alignedblob.cpp:27
#define tprintf(...)
Definition: tprintf.h:31
bool IsPulloutType() const
Definition: colpartition.h:431
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ BiggestBox()

BLOBNBOX * tesseract::ColPartition::BiggestBox ( )

Definition at line 225 of file colpartition.cpp.

225  {
226  BLOBNBOX* biggest = NULL;
227  BLOBNBOX_C_IT bb_it(&boxes_);
228  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
229  BLOBNBOX* bbox = bb_it.data();
230  if (IsVerticalType()) {
231  if (biggest == NULL ||
232  bbox->bounding_box().width() > biggest->bounding_box().width())
233  biggest = bbox;
234  } else {
235  if (biggest == NULL ||
236  bbox->bounding_box().height() > biggest->bounding_box().height())
237  biggest = bbox;
238  }
239  }
240  return biggest;
241 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
bool IsVerticalType() const
Definition: colpartition.h:435
inT16 width() const
Definition: rect.h:111
inT16 height() const
Definition: rect.h:104

◆ blob_type()

BlobRegionType tesseract::ColPartition::blob_type ( ) const
inline

Definition at line 148 of file colpartition.h.

148  {
149  return blob_type_;
150  }

◆ block_owned()

bool tesseract::ColPartition::block_owned ( ) const
inline

Definition at line 205 of file colpartition.h.

205  {
206  return block_owned_;
207  }

◆ bottom_spacing()

int tesseract::ColPartition::bottom_spacing ( ) const
inline

Definition at line 220 of file colpartition.h.

220  {
221  return bottom_spacing_;
222  }

◆ bounding_box()

const TBOX& tesseract::ColPartition::bounding_box ( ) const
inline

Definition at line 109 of file colpartition.h.

109  {
110  return bounding_box_;
111  }

◆ BoundsWithoutBox()

TBOX tesseract::ColPartition::BoundsWithoutBox ( BLOBNBOX box)

Definition at line 244 of file colpartition.cpp.

244  {
245  TBOX result;
246  BLOBNBOX_C_IT bb_it(&boxes_);
247  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
248  if (box != bb_it.data()) {
249  result += bb_it.data()->bounding_box();
250  }
251  }
252  return result;
253 }
Definition: rect.h:30

◆ BoxColor()

ScrollView::Color tesseract::ColPartition::BoxColor ( ) const

Definition at line 1778 of file colpartition.cpp.

1778  {
1779  if (type_ == PT_UNKNOWN)
1780  return BLOBNBOX::TextlineColor(blob_type_, flow_);
1781  return POLY_BLOCK::ColorForPolyBlockType(type_);
1782 }
static ScrollView::Color ColorForPolyBlockType(PolyBlockType type)
Returns a color to draw the given type.
Definition: polyblk.cpp:397
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
Definition: blobbox.cpp:439

◆ boxes()

BLOBNBOX_CLIST* tesseract::ColPartition::boxes ( )
inline

Definition at line 187 of file colpartition.h.

187  {
188  return &boxes_;
189  }

◆ boxes_count()

int tesseract::ColPartition::boxes_count ( ) const
inline

Definition at line 190 of file colpartition.h.

190  {
191  return boxes_.length();
192  }

◆ BoxLeftKey()

int tesseract::ColPartition::BoxLeftKey ( ) const
inline

Definition at line 332 of file colpartition.h.

332  {
333  return SortKey(bounding_box_.left(), MidY());
334  }
inT16 left() const
Definition: rect.h:68
int SortKey(int x, int y) const
Definition: colpartition.h:316

◆ BoxRightKey()

int tesseract::ColPartition::BoxRightKey ( ) const
inline

Definition at line 336 of file colpartition.h.

336  {
337  return SortKey(bounding_box_.right(), MidY());
338  }
int SortKey(int x, int y) const
Definition: colpartition.h:316
inT16 right() const
Definition: rect.h:75

◆ ClaimBoxes()

void tesseract::ColPartition::ClaimBoxes ( )

Definition at line 257 of file colpartition.cpp.

257  {
258  BLOBNBOX_C_IT bb_it(&boxes_);
259  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
260  BLOBNBOX* bblob = bb_it.data();
261  ColPartition* other = bblob->owner();
262  if (other == NULL) {
263  // Normal case: ownership is available.
264  bblob->set_owner(this);
265  } else {
266  ASSERT_HOST(other == this);
267  }
268  }
269 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ clear_table_type()

void tesseract::ColPartition::clear_table_type ( )
inline

Definition at line 239 of file colpartition.h.

239  {
240  if (type_ == PT_TABLE)
241  type_ = type_before_table_;
242  }
Definition: capi.h:97

◆ color1()

uinT8* tesseract::ColPartition::color1 ( )
inline

Definition at line 285 of file colpartition.h.

285  {
286  return color1_;
287  }

◆ color2()

uinT8* tesseract::ColPartition::color2 ( )
inline

Definition at line 288 of file colpartition.h.

288  {
289  return color2_;
290  }

◆ column_set()

ColPartitionSet* tesseract::ColPartition::column_set ( ) const
inline

Definition at line 214 of file colpartition.h.

214  {
215  return column_set_;
216  }

◆ ColumnContains()

bool tesseract::ColPartition::ColumnContains ( int  x,
int  y 
) const
inline

Definition at line 353 of file colpartition.h.

353  {
354  return LeftAtY(y) - 1 <= x && x <= RightAtY(y) + 1;
355  }
int LeftAtY(int y) const
Definition: colpartition.h:340
int RightAtY(int y) const
Definition: colpartition.h:344

◆ ColumnRange()

void tesseract::ColPartition::ColumnRange ( int  resolution,
ColPartitionSet columns,
int *  first_col,
int *  last_col 
)

Definition at line 1064 of file colpartition.cpp.

1065  {
1066  int first_spanned_col = -1;
1067  ColumnSpanningType span_type =
1068  columns->SpanningType(resolution,
1069  bounding_box_.left(), bounding_box_.right(),
1070  MIN(bounding_box_.height(), bounding_box_.width()),
1071  MidY(), left_margin_, right_margin_,
1072  first_col, last_col,
1073  &first_spanned_col);
1074  type_ = PartitionType(span_type);
1075 }
inT16 width() const
Definition: rect.h:111
#define MIN(x, y)
Definition: ndminx.h:28
PolyBlockType PartitionType(ColumnSpanningType flow) const
inT16 left() const
Definition: rect.h:68
inT16 height() const
Definition: rect.h:104
inT16 right() const
Definition: rect.h:75

◆ ColumnWidth()

int tesseract::ColPartition::ColumnWidth ( ) const
inline

Definition at line 328 of file colpartition.h.

328  {
329  return KeyWidth(left_key_, right_key_);
330  }
int KeyWidth(int left_key, int right_key) const
Definition: colpartition.h:324

◆ ComputeLimits()

void tesseract::ColPartition::ComputeLimits ( )

Definition at line 869 of file colpartition.cpp.

869  {
870  bounding_box_ = TBOX(); // Clear it
871  BLOBNBOX_C_IT it(&boxes_);
872  BLOBNBOX* bbox = NULL;
873  int non_leader_count = 0;
874  if (it.empty()) {
875  bounding_box_.set_left(left_margin_);
876  bounding_box_.set_right(right_margin_);
877  bounding_box_.set_bottom(0);
878  bounding_box_.set_top(0);
879  } else {
880  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
881  bbox = it.data();
882  bounding_box_ += bbox->bounding_box();
883  if (bbox->flow() != BTFT_LEADER)
884  ++non_leader_count;
885  }
886  }
887  if (!left_key_tab_)
888  left_key_ = BoxLeftKey();
889  if (left_key_ > BoxLeftKey() && textord_debug_bugs) {
890  // TODO(rays) investigate the causes of these error messages, to find
891  // out if they are genuinely harmful, or just indicative of junk input.
892  tprintf("Computed left-illegal partition\n");
893  Print();
894  }
895  if (!right_key_tab_)
896  right_key_ = BoxRightKey();
897  if (right_key_ < BoxRightKey() && textord_debug_bugs) {
898  tprintf("Computed right-illegal partition\n");
899  Print();
900  }
901  if (it.empty())
902  return;
903  if (IsImageType() || blob_type() == BRT_RECTIMAGE ||
904  blob_type() == BRT_POLYIMAGE) {
905  median_top_ = bounding_box_.top();
906  median_bottom_ = bounding_box_.bottom();
907  median_size_ = bounding_box_.height();
908  median_left_ = bounding_box_.left();
909  median_right_ = bounding_box_.right();
910  median_width_ = bounding_box_.width();
911  } else {
912  STATS top_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
913  STATS bottom_stats(bounding_box_.bottom(), bounding_box_.top() + 1);
914  STATS size_stats(0, bounding_box_.height() + 1);
915  STATS left_stats(bounding_box_.left(), bounding_box_.right() + 1);
916  STATS right_stats(bounding_box_.left(), bounding_box_.right() + 1);
917  STATS width_stats(0, bounding_box_.width() + 1);
918  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
919  bbox = it.data();
920  if (non_leader_count == 0 || bbox->flow() != BTFT_LEADER) {
921  const TBOX& box = bbox->bounding_box();
922  int area = box.area();
923  top_stats.add(box.top(), area);
924  bottom_stats.add(box.bottom(), area);
925  size_stats.add(box.height(), area);
926  left_stats.add(box.left(), area);
927  right_stats.add(box.right(), area);
928  width_stats.add(box.width(), area);
929  }
930  }
931  median_top_ = static_cast<int>(top_stats.median() + 0.5);
932  median_bottom_ = static_cast<int>(bottom_stats.median() + 0.5);
933  median_size_ = static_cast<int>(size_stats.median() + 0.5);
934  median_left_ = static_cast<int>(left_stats.median() + 0.5);
935  median_right_ = static_cast<int>(right_stats.median() + 0.5);
936  median_width_ = static_cast<int>(width_stats.median() + 0.5);
937  }
938 
939  if (right_margin_ < bounding_box_.right() && textord_debug_bugs) {
940  tprintf("Made partition with bad right coords");
941  Print();
942  }
943  if (left_margin_ > bounding_box_.left() && textord_debug_bugs) {
944  tprintf("Made partition with bad left coords");
945  Print();
946  }
947  // Fix partner lists. The bounding box has changed and partners are stored
948  // in bounding box order, so remove and reinsert this as a partner
949  // of all its partners.
950  for (int upper = 0; upper < 2; ++upper) {
951  ColPartition_CLIST partners;
952  ColPartition_C_IT part_it(&partners);
953  part_it.add_list_after(upper ? &upper_partners_ : &lower_partners_);
954  for (part_it.move_to_first(); !part_it.empty(); part_it.forward()) {
955  ColPartition* partner = part_it.extract();
956  partner->RemovePartner(!upper, this);
957  partner->AddPartner(!upper, this);
958  }
959  }
960  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
961  bounding_box_.bottom())) {
962  tprintf("Recomputed box for partition %p\n", this);
963  Print();
964  }
965 }
void set_bottom(int y)
Definition: rect.h:64
const TBOX & bounding_box() const
Definition: blobbox.h:215
static bool WithinTestRegion(int detail_level, int x, int y)
inT16 width() const
Definition: rect.h:111
int textord_debug_bugs
Definition: alignedblob.cpp:28
BlobTextFlowType flow() const
Definition: blobbox.h:280
void set_left(int x)
Definition: rect.h:71
inT16 bottom() const
Definition: rect.h:61
inT16 left() const
Definition: rect.h:68
inT32 area() const
Definition: rect.h:118
inT16 height() const
Definition: rect.h:104
void set_right(int x)
Definition: rect.h:78
#define tprintf(...)
Definition: tprintf.h:31
inT16 top() const
Definition: rect.h:54
bool IsImageType() const
Definition: colpartition.h:423
void set_top(int y)
Definition: rect.h:57
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
BlobRegionType blob_type() const
Definition: colpartition.h:148
Definition: statistc.h:33

◆ ComputeSpecialBlobsDensity()

void tesseract::ColPartition::ComputeSpecialBlobsDensity ( )

Definition at line 592 of file colpartition.cpp.

592  {
593  memset(special_blobs_densities_, 0, sizeof(special_blobs_densities_));
594  if (boxes_.empty()) {
595  return;
596  }
597 
598  BLOBNBOX_C_IT blob_it(&boxes_);
599  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
600  BLOBNBOX* blob = blob_it.data();
602  special_blobs_densities_[type]++;
603  }
604 
605  for (int type = 0; type < BSTT_COUNT; ++type) {
606  special_blobs_densities_[type] /= boxes_.length();
607  }
608 }
PolyBlockType type() const
Definition: colpartition.h:181
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:274
BlobSpecialTextType
Definition: blobbox.h:81

◆ ConfirmNoTabViolation()

bool tesseract::ColPartition::ConfirmNoTabViolation ( const ColPartition other) const

Definition at line 423 of file colpartition.cpp.

423  {
424  if (bounding_box_.right() < other.bounding_box_.left() &&
425  bounding_box_.right() < other.LeftBlobRule())
426  return false;
427  if (other.bounding_box_.right() < bounding_box_.left() &&
428  other.bounding_box_.right() < LeftBlobRule())
429  return false;
430  if (bounding_box_.left() > other.bounding_box_.right() &&
431  bounding_box_.left() > other.RightBlobRule())
432  return false;
433  if (other.bounding_box_.left() > bounding_box_.right() &&
434  other.bounding_box_.left() > RightBlobRule())
435  return false;
436  return true;
437 }
inT16 left() const
Definition: rect.h:68
inT16 right() const
Definition: rect.h:75

◆ CopyButDontOwnBlobs()

ColPartition * tesseract::ColPartition::CopyButDontOwnBlobs ( )

Definition at line 1765 of file colpartition.cpp.

1765  {
1766  ColPartition* copy = ShallowCopy();
1767  copy->set_owns_blobs(false);
1768  BLOBNBOX_C_IT inserter(copy->boxes());
1769  BLOBNBOX_C_IT traverser(boxes());
1770  for (traverser.mark_cycle_pt(); !traverser.cycled_list(); traverser.forward())
1771  inserter.add_after_then_move(traverser.data());
1772  return copy;
1773 }
ColPartition * ShallowCopy() const
BLOBNBOX_CLIST * boxes()
Definition: colpartition.h:187

◆ CopyLeftTab()

void tesseract::ColPartition::CopyLeftTab ( const ColPartition src,
bool  take_box 
)

Definition at line 529 of file colpartition.cpp.

529  {
530  left_key_tab_ = take_box ? false : src.left_key_tab_;
531  if (left_key_tab_) {
532  left_key_ = src.left_key_;
533  } else {
534  bounding_box_.set_left(XAtY(src.BoxLeftKey(), MidY()));
535  left_key_ = BoxLeftKey();
536  }
537  if (left_margin_ > bounding_box_.left())
538  left_margin_ = src.left_margin_;
539 }
void set_left(int x)
Definition: rect.h:71
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320
inT16 left() const
Definition: rect.h:68

◆ CopyRightTab()

void tesseract::ColPartition::CopyRightTab ( const ColPartition src,
bool  take_box 
)

Definition at line 542 of file colpartition.cpp.

542  {
543  right_key_tab_ = take_box ? false : src.right_key_tab_;
544  if (right_key_tab_) {
545  right_key_ = src.right_key_;
546  } else {
547  bounding_box_.set_right(XAtY(src.BoxRightKey(), MidY()));
548  right_key_ = BoxRightKey();
549  }
550  if (right_margin_ < bounding_box_.right())
551  right_margin_ = src.right_margin_;
552 }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320
void set_right(int x)
Definition: rect.h:78
inT16 right() const
Definition: rect.h:75

◆ CountOverlappingBoxes()

int tesseract::ColPartition::CountOverlappingBoxes ( const TBOX box)

Definition at line 968 of file colpartition.cpp.

968  {
969  BLOBNBOX_C_IT it(&boxes_);
970  int overlap_count = 0;
971  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
972  BLOBNBOX* bbox = it.data();
973  if (box.overlap(bbox->bounding_box()))
974  ++overlap_count;
975  }
976  return overlap_count;
977 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
bool overlap(const TBOX &box) const
Definition: rect.h:345

◆ DeleteBoxes()

void tesseract::ColPartition::DeleteBoxes ( )

Definition at line 315 of file colpartition.cpp.

315  {
316  // Although the boxes_ list is a C_LIST, in some cases it owns the
317  // BLOBNBOXes, as the ColPartition takes ownership from the grid,
318  // and the BLOBNBOXes own the underlying C_BLOBs.
319  for (BLOBNBOX_C_IT bb_it(&boxes_); !bb_it.empty(); bb_it.forward()) {
320  BLOBNBOX* bblob = bb_it.extract();
321  delete bblob->cblob();
322  delete bblob;
323  }
324 }
C_BLOB * cblob() const
Definition: blobbox.h:253

◆ desperately_merged()

bool tesseract::ColPartition::desperately_merged ( ) const
inline

Definition at line 211 of file colpartition.h.

211  {
212  return desperately_merged_;
213  }

◆ DisownBoxes()

void tesseract::ColPartition::DisownBoxes ( )

Definition at line 273 of file colpartition.cpp.

273  {
274  BLOBNBOX_C_IT bb_it(&boxes_);
275  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
276  BLOBNBOX* bblob = bb_it.data();
277  ASSERT_HOST(bblob->owner() == this || bblob->owner() == NULL);
278  bblob->set_owner(NULL);
279  }
280 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ DisownBoxesNoAssert()

void tesseract::ColPartition::DisownBoxesNoAssert ( )

Definition at line 286 of file colpartition.cpp.

286  {
287  BLOBNBOX_C_IT bb_it(&boxes_);
288  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
289  BLOBNBOX* bblob = bb_it.data();
290  if (bblob->owner() == this)
291  bblob->set_owner(NULL);
292  }
293 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340

◆ FakePartition()

ColPartition * tesseract::ColPartition::FakePartition ( const TBOX box,
PolyBlockType  block_type,
BlobRegionType  blob_type,
BlobTextFlowType  flow 
)
static

Definition at line 107 of file colpartition.cpp.

110  {
111  ColPartition* part = new ColPartition(blob_type, ICOORD(0, 1));
112  part->set_type(block_type);
113  part->set_flow(flow);
114  part->AddBox(new BLOBNBOX(C_BLOB::FakeBlob(box)));
115  part->set_left_margin(box.left());
116  part->set_right_margin(box.right());
117  part->SetBlobTypes();
118  part->ComputeLimits();
119  part->ClaimBoxes();
120  return part;
121 }
integer coordinate
Definition: points.h:30
inT16 left() const
Definition: rect.h:68
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
inT16 right() const
Definition: rect.h:75
BlobTextFlowType flow() const
Definition: colpartition.h:154
BlobRegionType blob_type() const
Definition: colpartition.h:148

◆ flow()

BlobTextFlowType tesseract::ColPartition::flow ( ) const
inline

Definition at line 154 of file colpartition.h.

154  {
155  return flow_;
156  }

◆ good_blob_score()

int tesseract::ColPartition::good_blob_score ( ) const
inline

Definition at line 160 of file colpartition.h.

160  {
161  return good_blob_score_;
162  }

◆ good_column()

bool tesseract::ColPartition::good_column ( ) const
inline

Definition at line 166 of file colpartition.h.

166  {
167  return good_column_;
168  }

◆ good_width()

bool tesseract::ColPartition::good_width ( ) const
inline

Definition at line 163 of file colpartition.h.

163  {
164  return good_width_;
165  }

◆ HasGoodBaseline()

bool tesseract::ColPartition::HasGoodBaseline ( )

Definition at line 1289 of file colpartition.cpp.

1289  {
1290  // Approximation of the baseline.
1291  DetLineFit linepoints;
1292  // Calculation of the mean height on this line segment. Note that these
1293  // variable names apply to the context of a horizontal line, and work
1294  // analogously, rather than literally in the case of a vertical line.
1295  int total_height = 0;
1296  int coverage = 0;
1297  int height_count = 0;
1298  int width = 0;
1299  BLOBNBOX_C_IT it(&boxes_);
1300  TBOX box(it.data()->bounding_box());
1301  // Accumulate points representing the baseline at the middle of each blob,
1302  // but add an additional point for each end of the line. This makes it
1303  // harder to fit a severe skew angle, as it is most likely not right.
1304  if (IsVerticalType()) {
1305  // For a vertical line, use the right side as the baseline.
1306  ICOORD first_pt(box.right(), box.bottom());
1307  // Use the bottom-right of the first (bottom) box, the top-right of the
1308  // last, and the middle-right of all others.
1309  linepoints.Add(first_pt);
1310  for (it.forward(); !it.at_last(); it.forward()) {
1311  BLOBNBOX* blob = it.data();
1312  box = blob->bounding_box();
1313  ICOORD box_pt(box.right(), (box.top() + box.bottom()) / 2);
1314  linepoints.Add(box_pt);
1315  total_height += box.width();
1316  coverage += box.height();
1317  ++height_count;
1318  }
1319  box = it.data()->bounding_box();
1320  ICOORD last_pt(box.right(), box.top());
1321  linepoints.Add(last_pt);
1322  width = last_pt.y() - first_pt.y();
1323 
1324  } else {
1325  // Horizontal lines use the bottom as the baseline.
1326  TBOX box(it.data()->bounding_box());
1327  // Use the bottom-left of the first box, the the bottom-right of the last,
1328  // and the middle of all others.
1329  ICOORD first_pt(box.left(), box.bottom());
1330  linepoints.Add(first_pt);
1331  for (it.forward(); !it.at_last(); it.forward()) {
1332  BLOBNBOX* blob = it.data();
1333  box = blob->bounding_box();
1334  ICOORD box_pt((box.left() + box.right()) / 2, box.bottom());
1335  linepoints.Add(box_pt);
1336  total_height += box.height();
1337  coverage += box.width();
1338  ++height_count;
1339  }
1340  box = it.data()->bounding_box();
1341  ICOORD last_pt(box.right(), box.bottom());
1342  linepoints.Add(last_pt);
1343  width = last_pt.x() - first_pt.x();
1344  }
1345  // Maximum median error allowed to be a good text line.
1346  double max_error = kMaxBaselineError * total_height / height_count;
1347  ICOORD start_pt, end_pt;
1348  double error = linepoints.Fit(&start_pt, &end_pt);
1349  return error < max_error && coverage >= kMinBaselineCoverage * width;
1350 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
bool IsVerticalType() const
Definition: colpartition.h:435
integer coordinate
Definition: points.h:30
const double kMaxBaselineError
inT16 x() const
access function
Definition: points.h:52
Definition: rect.h:30
const double kMinBaselineCoverage
inT16 y() const
access_function
Definition: points.h:56

◆ HCoreOverlap()

int tesseract::ColPartition::HCoreOverlap ( const ColPartition other) const
inline

Definition at line 381 of file colpartition.h.

381  {
382  return MIN(median_right_, other.median_right_) -
383  MAX(median_left_, other.median_left_);
384  }
#define MIN(x, y)
Definition: ndminx.h:28
#define MAX(x, y)
Definition: ndminx.h:24

◆ HOverlaps()

bool tesseract::ColPartition::HOverlaps ( const ColPartition other) const
inline

Definition at line 365 of file colpartition.h.

365  {
366  return bounding_box_.x_overlap(other.bounding_box_);
367  }
bool x_overlap(const TBOX &box) const
Definition: rect.h:391

◆ inside_table_column()

bool tesseract::ColPartition::inside_table_column ( )
inline

Definition at line 243 of file colpartition.h.

243  {
244  return inside_table_column_;
245  }

◆ IsEmpty()

bool tesseract::ColPartition::IsEmpty ( ) const
inline

Definition at line 357 of file colpartition.h.

357  {
358  return boxes_.empty();
359  }

◆ IsHorizontalLine()

bool tesseract::ColPartition::IsHorizontalLine ( ) const
inline

Definition at line 453 of file colpartition.h.

453  {
454  return IsHorizontalType() && IsLineType();
455  }
bool IsHorizontalType() const
Definition: colpartition.h:439

◆ IsHorizontalType()

bool tesseract::ColPartition::IsHorizontalType ( ) const
inline

Definition at line 439 of file colpartition.h.

439  {
440  return blob_type_ == BRT_TEXT || blob_type_ == BRT_HLINE;
441  }

◆ IsImageType()

bool tesseract::ColPartition::IsImageType ( ) const
inline

Definition at line 423 of file colpartition.h.

423  {
424  return PTIsImageType(type_);
425  }
bool PTIsImageType(PolyBlockType type)
Definition: publictypes.h:65

◆ IsInSameColumnAs()

bool tesseract::ColPartition::IsInSameColumnAs ( const ColPartition part) const

Definition at line 2182 of file colpartition.cpp.

2182  {
2183  // Overlap does not occur when last < part.first or first > part.last.
2184  // In other words, one is completely to the side of the other.
2185  // This is just DeMorgan's law applied to that so the function returns true.
2186  return (last_column_ >= part.first_column_) &&
2187  (first_column_ <= part.last_column_);
2188 }

◆ IsLeftOf()

bool tesseract::ColPartition::IsLeftOf ( const ColPartition other) const
inline

Definition at line 349 of file colpartition.h.

349  {
350  return bounding_box_.right() < other.bounding_box_.right();
351  }
inT16 right() const
Definition: rect.h:75

◆ IsLegal()

bool tesseract::ColPartition::IsLegal ( )

Definition at line 352 of file colpartition.cpp.

352  {
353  if (bounding_box_.left() > bounding_box_.right()) {
354  if (textord_debug_bugs) {
355  tprintf("Bounding box invalid\n");
356  Print();
357  }
358  return false; // Bounding box invalid.
359  }
360  if (left_margin_ > bounding_box_.left() ||
361  right_margin_ < bounding_box_.right()) {
362  if (textord_debug_bugs) {
363  tprintf("Margins invalid\n");
364  Print();
365  }
366  return false; // Margins invalid.
367  }
368  if (left_key_ > BoxLeftKey() || right_key_ < BoxRightKey()) {
369  if (textord_debug_bugs) {
370  tprintf("Key inside box: %d v %d or %d v %d\n",
371  left_key_, BoxLeftKey(), right_key_, BoxRightKey());
372  Print();
373  }
374  return false; // Keys inside the box.
375  }
376  return true;
377 }
int textord_debug_bugs
Definition: alignedblob.cpp:28
inT16 left() const
Definition: rect.h:68
#define tprintf(...)
Definition: tprintf.h:31
inT16 right() const
Definition: rect.h:75

◆ IsLineType()

bool tesseract::ColPartition::IsLineType ( ) const
inline

Definition at line 419 of file colpartition.h.

419  {
420  return PTIsLineType(type_);
421  }
bool PTIsLineType(PolyBlockType type)
Definition: publictypes.h:61

◆ IsPulloutType()

bool tesseract::ColPartition::IsPulloutType ( ) const
inline

Definition at line 431 of file colpartition.h.

431  {
432  return PTIsPulloutType(type_);
433  }
bool PTIsPulloutType(PolyBlockType type)
Definition: publictypes.h:77

◆ IsSingleton()

bool tesseract::ColPartition::IsSingleton ( ) const
inline

Definition at line 361 of file colpartition.h.

361  {
362  return boxes_.singleton();
363  }

◆ IsTextType()

bool tesseract::ColPartition::IsTextType ( ) const
inline

Definition at line 427 of file colpartition.h.

427  {
428  return PTIsTextType(type_);
429  }
bool PTIsTextType(PolyBlockType type)
Definition: publictypes.h:70

◆ IsUnMergeableType()

bool tesseract::ColPartition::IsUnMergeableType ( ) const
inline

Definition at line 443 of file colpartition.h.

443  {
444  return BLOBNBOX::UnMergeableType(blob_type_) || type_ == PT_NOISE;
445  }
Definition: capi.h:98
static bool UnMergeableType(BlobRegionType type)
Definition: blobbox.h:415

◆ IsVerticalLine()

bool tesseract::ColPartition::IsVerticalLine ( ) const
inline

Definition at line 448 of file colpartition.h.

448  {
449  return IsVerticalType() && IsLineType();
450  }
bool IsVerticalType() const
Definition: colpartition.h:435

◆ IsVerticalType()

bool tesseract::ColPartition::IsVerticalType ( ) const
inline

Definition at line 435 of file colpartition.h.

435  {
436  return blob_type_ == BRT_VERT_TEXT || blob_type_ == BRT_VLINE;
437  }

◆ KeyWidth()

int tesseract::ColPartition::KeyWidth ( int  left_key,
int  right_key 
) const
inline

Definition at line 324 of file colpartition.h.

324  {
325  return (right_key - left_key) / vertical_.y();
326  }
inT16 y() const
access_function
Definition: points.h:56

◆ left_key()

int tesseract::ColPartition::left_key ( ) const
inline

Definition at line 172 of file colpartition.h.

172  {
173  return left_key_;
174  }

◆ left_key_tab()

bool tesseract::ColPartition::left_key_tab ( ) const
inline

Definition at line 169 of file colpartition.h.

169  {
170  return left_key_tab_;
171  }

◆ left_margin()

int tesseract::ColPartition::left_margin ( ) const
inline

Definition at line 112 of file colpartition.h.

112  {
113  return left_margin_;
114  }

◆ LeftAtY()

int tesseract::ColPartition::LeftAtY ( int  y) const
inline

Definition at line 340 of file colpartition.h.

340  {
341  return XAtY(left_key_, y);
342  }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320

◆ LeftBlobRule()

int tesseract::ColPartition::LeftBlobRule ( ) const

Definition at line 555 of file colpartition.cpp.

555  {
556  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
557  return it.data()->left_rule();
558 }

◆ LineSpacingBlocks()

void tesseract::ColPartition::LineSpacingBlocks ( const ICOORD bleft,
const ICOORD tright,
int  resolution,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts,
BLOCK_LIST *  completed_blocks,
TO_BLOCK_LIST *  to_blocks 
)
static

Definition at line 1414 of file colpartition.cpp.

1419  {
1420  int page_height = tright.y() - bleft.y();
1421  // Compute the initial spacing stats.
1422  ColPartition_IT it(block_parts);
1423  int part_count = 0;
1424  int max_line_height = 0;
1425 
1426  // TODO(joeliu): We should add some special logic for PT_INLINE_EQUATION type
1427  // because their line spacing with their neighbors maybe smaller and their
1428  // height may be slightly larger.
1429 
1430  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1431  ColPartition* part = it.data();
1432  ASSERT_HOST(!part->boxes()->empty());
1433  STATS side_steps(0, part->bounding_box().height());
1434  if (part->bounding_box().height() > max_line_height)
1435  max_line_height = part->bounding_box().height();
1436  BLOBNBOX_C_IT blob_it(part->boxes());
1437  int prev_bottom = blob_it.data()->bounding_box().bottom();
1438  for (blob_it.forward(); !blob_it.at_first(); blob_it.forward()) {
1439  BLOBNBOX* blob = blob_it.data();
1440  int bottom = blob->bounding_box().bottom();
1441  int step = bottom - prev_bottom;
1442  if (step < 0)
1443  step = -step;
1444  side_steps.add(step, 1);
1445  prev_bottom = bottom;
1446  }
1447  part->set_side_step(static_cast<int>(side_steps.median() + 0.5));
1448  if (!it.at_last()) {
1449  ColPartition* next_part = it.data_relative(1);
1450  part->set_bottom_spacing(part->median_bottom() -
1451  next_part->median_bottom());
1452  part->set_top_spacing(part->median_top() - next_part->median_top());
1453  } else {
1454  part->set_bottom_spacing(page_height);
1455  part->set_top_spacing(page_height);
1456  }
1457  if (textord_debug_tabfind) {
1458  part->Print();
1459  tprintf("side step = %.2f, top spacing = %d, bottom spacing=%d\n",
1460  side_steps.median(), part->top_spacing(), part->bottom_spacing());
1461  }
1462  ++part_count;
1463  }
1464  if (part_count == 0)
1465  return;
1466 
1467  SmoothSpacings(resolution, page_height, block_parts);
1468 
1469  // Move the partitions into individual block lists and make the blocks.
1470  BLOCK_IT block_it(completed_blocks);
1471  TO_BLOCK_IT to_block_it(to_blocks);
1472  ColPartition_LIST spacing_parts;
1473  ColPartition_IT sp_block_it(&spacing_parts);
1474  int same_block_threshold = max_line_height * kMaxSameBlockLineSpacing;
1475  for (it.mark_cycle_pt(); !it.empty();) {
1476  ColPartition* part = it.extract();
1477  sp_block_it.add_to_end(part);
1478  it.forward();
1479  if (it.empty() || part->bottom_spacing() > same_block_threshold ||
1480  !part->SpacingsEqual(*it.data(), resolution)) {
1481  // There is a spacing boundary. Check to see if it.data() belongs
1482  // better in the current block or the next one.
1483  if (!it.empty() && part->bottom_spacing() <= same_block_threshold) {
1484  ColPartition* next_part = it.data();
1485  // If there is a size match one-way, then the middle line goes with
1486  // its matched size, otherwise it goes with the smallest spacing.
1487  ColPartition* third_part = it.at_last() ? NULL : it.data_relative(1);
1488  if (textord_debug_tabfind) {
1489  tprintf("Spacings unequal: upper:%d/%d, lower:%d/%d,"
1490  " sizes %d %d %d\n",
1491  part->top_spacing(), part->bottom_spacing(),
1492  next_part->top_spacing(), next_part->bottom_spacing(),
1493  part->median_size(), next_part->median_size(),
1494  third_part != NULL ? third_part->median_size() : 0);
1495  }
1496  // We can only consider adding the next line to the block if the sizes
1497  // match and the lines are close enough for their size.
1498  if (part->SizesSimilar(*next_part) &&
1499  next_part->median_size() * kMaxSameBlockLineSpacing >
1500  part->bottom_spacing() &&
1501  part->median_size() * kMaxSameBlockLineSpacing >
1502  part->top_spacing()) {
1503  // Even now, we can only add it as long as the third line doesn't
1504  // match in the same way and have a smaller bottom spacing.
1505  if (third_part == NULL ||
1506  !next_part->SizesSimilar(*third_part) ||
1507  third_part->median_size() * kMaxSameBlockLineSpacing <=
1508  next_part->bottom_spacing() ||
1509  next_part->median_size() * kMaxSameBlockLineSpacing <=
1510  next_part->top_spacing() ||
1511  next_part->bottom_spacing() > part->bottom_spacing()) {
1512  // Add to the current block.
1513  sp_block_it.add_to_end(it.extract());
1514  it.forward();
1515  if (textord_debug_tabfind) {
1516  tprintf("Added line to current block.\n");
1517  }
1518  }
1519  }
1520  }
1521  TO_BLOCK* to_block = MakeBlock(bleft, tright, &spacing_parts, used_parts);
1522  if (to_block != NULL) {
1523  to_block_it.add_to_end(to_block);
1524  block_it.add_to_end(to_block->block);
1525  }
1526  sp_block_it.set_to_list(&spacing_parts);
1527  } else {
1528  if (textord_debug_tabfind && !it.empty()) {
1529  ColPartition* next_part = it.data();
1530  tprintf("Spacings equal: upper:%d/%d, lower:%d/%d, median:%d/%d\n",
1531  part->top_spacing(), part->bottom_spacing(),
1532  next_part->top_spacing(), next_part->bottom_spacing(),
1533  part->median_size(), next_part->median_size());
1534  }
1535  }
1536  }
1537 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
BLOCK * block
Definition: blobbox.h:773
inT16 bottom() const
Definition: rect.h:61
int textord_debug_tabfind
Definition: alignedblob.cpp:27
#define tprintf(...)
Definition: tprintf.h:31
const double kMaxSameBlockLineSpacing
static TO_BLOCK * MakeBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
Definition: statistc.h:33
#define ASSERT_HOST(x)
Definition: errcode.h:84
inT16 y() const
access_function
Definition: points.h:56

◆ lower_partners()

ColPartition_CLIST* tesseract::ColPartition::lower_partners ( )
inline

Definition at line 199 of file colpartition.h.

199  {
200  return &lower_partners_;
201  }

◆ MakeBigPartition()

ColPartition * tesseract::ColPartition::MakeBigPartition ( BLOBNBOX box,
ColPartition_LIST *  big_part_list 
)
static

Definition at line 128 of file colpartition.cpp.

129  {
130  box->set_owner(NULL);
131  ColPartition* single = new ColPartition(BRT_UNKNOWN, ICOORD(0, 1));
132  single->set_flow(BTFT_NONE);
133  single->AddBox(box);
134  single->ComputeLimits();
135  single->ClaimBoxes();
136  single->SetBlobTypes();
137  single->set_block_owned(true);
138  if (big_part_list != NULL) {
139  ColPartition_IT part_it(big_part_list);
140  part_it.add_to_end(single);
141  }
142  return single;
143 }
integer coordinate
Definition: points.h:30
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340

◆ MakeBlock()

TO_BLOCK * tesseract::ColPartition::MakeBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1630 of file colpartition.cpp.

1632  {
1633  if (block_parts->empty())
1634  return NULL; // Nothing to do.
1635  // If the block_parts are not in reading order, then it will make an invalid
1636  // block polygon and bounding_box, so sort by bounding box now just to make
1637  // sure.
1638  block_parts->sort(&ColPartition::SortByBBox);
1639  ColPartition_IT it(block_parts);
1640  ColPartition* part = it.data();
1641  PolyBlockType type = part->type();
1642  if (type == PT_VERTICAL_TEXT)
1643  return MakeVerticalTextBlock(bleft, tright, block_parts, used_parts);
1644  // LineSpacingBlocks has handed us a collection of evenly spaced lines and
1645  // put the average spacing in each partition, so we can just take the
1646  // linespacing from the first partition.
1647  int line_spacing = part->bottom_spacing();
1648  if (line_spacing < part->median_size())
1649  line_spacing = part->bounding_box().height();
1650  ICOORDELT_LIST vertices;
1651  ICOORDELT_IT vert_it(&vertices);
1652  ICOORD start, end;
1653  int min_x = MAX_INT32;
1654  int max_x = -MAX_INT32;
1655  int min_y = MAX_INT32;
1656  int max_y = -MAX_INT32;
1657  int iteration = 0;
1658  do {
1659  if (iteration == 0)
1660  ColPartition::LeftEdgeRun(&it, &start, &end);
1661  else
1662  ColPartition::RightEdgeRun(&it, &start, &end);
1663  ClipCoord(bleft, tright, &start);
1664  ClipCoord(bleft, tright, &end);
1665  vert_it.add_after_then_move(new ICOORDELT(start));
1666  vert_it.add_after_then_move(new ICOORDELT(end));
1667  UpdateRange(start.x(), &min_x, &max_x);
1668  UpdateRange(end.x(), &min_x, &max_x);
1669  UpdateRange(start.y(), &min_y, &max_y);
1670  UpdateRange(end.y(), &min_y, &max_y);
1671  if ((iteration == 0 && it.at_first()) ||
1672  (iteration == 1 && it.at_last())) {
1673  ++iteration;
1674  it.move_to_last();
1675  }
1676  } while (iteration < 2);
1678  tprintf("Making block at (%d,%d)->(%d,%d)\n",
1679  min_x, min_y, max_x, max_y);
1680  BLOCK* block = new BLOCK("", true, 0, 0, min_x, min_y, max_x, max_y);
1681  block->set_poly_block(new POLY_BLOCK(&vertices, type));
1682  return MoveBlobsToBlock(false, line_spacing, block, block_parts, used_parts);
1683 }
void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound)
Definition: helpers.h:125
static TO_BLOCK * MakeVerticalTextBlock(const ICOORD &bleft, const ICOORD &tright, ColPartition_LIST *block_parts, ColPartition_LIST *used_parts)
integer coordinate
Definition: points.h:30
PolyBlockType type() const
Definition: colpartition.h:181
static int SortByBBox(const void *p1, const void *p2)
Definition: colpartition.h:708
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:57
inT16 x() const
access function
Definition: points.h:52
int textord_debug_tabfind
Definition: alignedblob.cpp:27
#define tprintf(...)
Definition: tprintf.h:31
#define MAX_INT32
Definition: host.h:53
Definition: ocrblock.h:30
PolyBlockType
Definition: publictypes.h:41
inT16 y() const
access_function
Definition: points.h:56

◆ MakeLinePartition()

ColPartition * tesseract::ColPartition::MakeLinePartition ( BlobRegionType  blob_type,
const ICOORD vertical,
int  left,
int  bottom,
int  right,
int  top 
)
static

Constructs a fake ColPartition with no BLOBNBOXes to represent a horizontal or vertical line, given a type and a bounding box.

Definition at line 160 of file colpartition.cpp.

163  {
164  ColPartition* part = new ColPartition(blob_type, vertical);
165  part->bounding_box_ = TBOX(left, bottom, right, top);
166  part->median_bottom_ = bottom;
167  part->median_top_ = top;
168  part->median_size_ = top - bottom;
169  part->median_width_ = right - left;
170  part->left_key_ = part->BoxLeftKey();
171  part->right_key_ = part->BoxRightKey();
172  return part;
173 }
Definition: rect.h:30
BlobRegionType blob_type() const
Definition: colpartition.h:148

◆ MakeToRow()

TO_ROW * tesseract::ColPartition::MakeToRow ( )

Definition at line 1713 of file colpartition.cpp.

1713  {
1714  BLOBNBOX_C_IT blob_it(&boxes_);
1715  TO_ROW* row = NULL;
1716  int line_size = IsVerticalType() ? median_width_ : median_size_;
1717  // Add all the blobs to a single TO_ROW.
1718  for (; !blob_it.empty(); blob_it.forward()) {
1719  BLOBNBOX* blob = blob_it.extract();
1720 // blob->compute_bounding_box();
1721  int top = blob->bounding_box().top();
1722  int bottom = blob->bounding_box().bottom();
1723  if (row == NULL) {
1724  row = new TO_ROW(blob, static_cast<float>(top),
1725  static_cast<float>(bottom),
1726  static_cast<float>(line_size));
1727  } else {
1728  row->add_blob(blob, static_cast<float>(top),
1729  static_cast<float>(bottom),
1730  static_cast<float>(line_size));
1731  }
1732  }
1733  return row;
1734 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
bool IsVerticalType() const
Definition: colpartition.h:435
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:728
inT16 bottom() const
Definition: rect.h:61
inT16 top() const
Definition: rect.h:54

◆ MakeVerticalTextBlock()

TO_BLOCK * tesseract::ColPartition::MakeVerticalTextBlock ( const ICOORD bleft,
const ICOORD tright,
ColPartition_LIST *  block_parts,
ColPartition_LIST *  used_parts 
)
static

Definition at line 1687 of file colpartition.cpp.

1690  {
1691  if (block_parts->empty())
1692  return NULL; // Nothing to do.
1693  ColPartition_IT it(block_parts);
1694  ColPartition* part = it.data();
1695  TBOX block_box = part->bounding_box();
1696  int line_spacing = block_box.width();
1697  PolyBlockType type = it.data()->type();
1698  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1699  block_box += it.data()->bounding_box();
1700  }
1701  if (textord_debug_tabfind) {
1702  tprintf("Making block at:");
1703  block_box.print();
1704  }
1705  BLOCK* block = new BLOCK("", true, 0, 0, block_box.left(), block_box.bottom(),
1706  block_box.right(), block_box.top());
1707  block->set_poly_block(new POLY_BLOCK(block_box, type));
1708  return MoveBlobsToBlock(true, line_spacing, block, block_parts, used_parts);
1709 }
PolyBlockType type() const
Definition: colpartition.h:181
inT16 width() const
Definition: rect.h:111
inT16 bottom() const
Definition: rect.h:61
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:57
inT16 left() const
Definition: rect.h:68
void print() const
Definition: rect.h:270
int textord_debug_tabfind
Definition: alignedblob.cpp:27
#define tprintf(...)
Definition: tprintf.h:31
Definition: ocrblock.h:30
inT16 top() const
Definition: rect.h:54
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
PolyBlockType
Definition: publictypes.h:41

◆ MarkAsLeaderIfMonospaced()

bool tesseract::ColPartition::MarkAsLeaderIfMonospaced ( )

Definition at line 1091 of file colpartition.cpp.

1091  {
1092  bool result = false;
1093  // Gather statistics on the gaps between blobs and the widths of the blobs.
1094  int part_width = bounding_box_.width();
1095  STATS gap_stats(0, part_width);
1096  STATS width_stats(0, part_width);
1097  BLOBNBOX_C_IT it(&boxes_);
1098  BLOBNBOX* prev_blob = it.data();
1099  prev_blob->set_flow(BTFT_NEIGHBOURS);
1100  width_stats.add(prev_blob->bounding_box().width(), 1);
1101  int blob_count = 1;
1102  for (it.forward(); !it.at_first(); it.forward()) {
1103  BLOBNBOX* blob = it.data();
1104  int left = blob->bounding_box().left();
1105  int right = blob->bounding_box().right();
1106  gap_stats.add(left - prev_blob->bounding_box().right(), 1);
1107  width_stats.add(right - left, 1);
1108  blob->set_flow(BTFT_NEIGHBOURS);
1109  prev_blob = blob;
1110  ++blob_count;
1111  }
1112  double median_gap = gap_stats.median();
1113  double median_width = width_stats.median();
1114  double max_width = MAX(median_gap, median_width);
1115  double min_width = MIN(median_gap, median_width);
1116  double gap_iqr = gap_stats.ile(0.75f) - gap_stats.ile(0.25f);
1117  if (textord_debug_tabfind >= 4) {
1118  tprintf("gap iqr = %g, blob_count=%d, limits=%g,%g\n",
1119  gap_iqr, blob_count, max_width * kMaxLeaderGapFractionOfMax,
1120  min_width * kMaxLeaderGapFractionOfMin);
1121  }
1122  if (gap_iqr < max_width * kMaxLeaderGapFractionOfMax &&
1123  gap_iqr < min_width * kMaxLeaderGapFractionOfMin &&
1124  blob_count >= kMinLeaderCount) {
1125  // This is stable enough to be called a leader, so check the widths.
1126  // Since leader dashes can join, run a dp cutting algorithm and go
1127  // on the cost.
1128  int offset = static_cast<int>(ceil(gap_iqr * 2));
1129  int min_step = static_cast<int>(median_gap + median_width + 0.5);
1130  int max_step = min_step + offset;
1131  min_step -= offset;
1132  // Pad the buffer with min_step/2 on each end.
1133  int part_left = bounding_box_.left() - min_step / 2;
1134  part_width += min_step;
1135  DPPoint* projection = new DPPoint[part_width];
1136  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1137  BLOBNBOX* blob = it.data();
1138  int left = blob->bounding_box().left();
1139  int right = blob->bounding_box().right();
1140  int height = blob->bounding_box().height();
1141  for (int x = left; x < right; ++x) {
1142  projection[left - part_left].AddLocalCost(height);
1143  }
1144  }
1145  DPPoint* best_end = DPPoint::Solve(min_step, max_step, false,
1147  part_width, projection);
1148  if (best_end != NULL && best_end->total_cost() < blob_count) {
1149  // Good enough. Call it a leader.
1150  result = true;
1151  bool modified_blob_list = false;
1152  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1153  BLOBNBOX* blob = it.data();
1154  TBOX box = blob->bounding_box();
1155  // If the first or last blob is spaced too much, don't mark it.
1156  if (it.at_first()) {
1157  int gap = it.data_relative(1)->bounding_box().left() -
1158  blob->bounding_box().right();
1159  if (blob->bounding_box().width() + gap > max_step) {
1160  it.extract();
1161  modified_blob_list = true;
1162  continue;
1163  }
1164  }
1165  if (it.at_last()) {
1166  int gap = blob->bounding_box().left() -
1167  it.data_relative(-1)->bounding_box().right();
1168  if (blob->bounding_box().width() + gap > max_step) {
1169  it.extract();
1170  modified_blob_list = true;
1171  break;
1172  }
1173  }
1174  blob->set_region_type(BRT_TEXT);
1175  blob->set_flow(BTFT_LEADER);
1176  }
1177  if (modified_blob_list) ComputeLimits();
1178  blob_type_ = BRT_TEXT;
1179  flow_ = BTFT_LEADER;
1180  } else if (textord_debug_tabfind) {
1181  if (best_end == NULL) {
1182  tprintf("No path\n");
1183  } else {
1184  tprintf("Total cost = %d vs allowed %d\n", best_end->total_cost(),
1185  blob_count);
1186  }
1187  }
1188  delete [] projection;
1189  }
1190  return result;
1191 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT64 CostWithVariance(const DPPoint *prev)
Definition: dppoint.cpp:68
const int kMinLeaderCount
inT16 width() const
Definition: rect.h:111
#define MIN(x, y)
Definition: ndminx.h:28
const double kMaxLeaderGapFractionOfMin
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:283
const double kMaxLeaderGapFractionOfMax
static DPPoint * Solve(int min_step, int max_step, bool debug, CostFunc cost_func, int size, DPPoint *points)
Definition: dppoint.cpp:30
inT16 left() const
Definition: rect.h:68
inT16 height() const
Definition: rect.h:104
#define MAX(x, y)
Definition: ndminx.h:24
int textord_debug_tabfind
Definition: alignedblob.cpp:27
#define tprintf(...)
Definition: tprintf.h:31
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:271
Definition: statistc.h:33

◆ MatchingColumns()

bool tesseract::ColPartition::MatchingColumns ( const ColPartition other) const

Definition at line 380 of file colpartition.cpp.

380  {
381  int y = (MidY() + other.MidY()) / 2;
382  if (!NearlyEqual(other.LeftAtY(y) / kColumnWidthFactor,
383  LeftAtY(y) / kColumnWidthFactor, 1))
384  return false;
385  if (!NearlyEqual(other.RightAtY(y) / kColumnWidthFactor,
386  RightAtY(y) / kColumnWidthFactor, 1))
387  return false;
388  return true;
389 }
int LeftAtY(int y) const
Definition: colpartition.h:340
bool NearlyEqual(T x, T y, T tolerance)
Definition: host.h:77
const int kColumnWidthFactor
Definition: tabfind.h:42
int RightAtY(int y) const
Definition: colpartition.h:344

◆ MatchingSizes()

bool tesseract::ColPartition::MatchingSizes ( const ColPartition other) const

Definition at line 415 of file colpartition.cpp.

415  {
416  if (blob_type_ == BRT_VERT_TEXT || other.blob_type_ == BRT_VERT_TEXT)
417  return !TabFind::DifferentSizes(median_width_, other.median_width_);
418  else
419  return !TabFind::DifferentSizes(median_size_, other.median_size_);
420 }
static bool DifferentSizes(int size1, int size2)
Definition: tabfind.cpp:408

◆ MatchingStrokeWidth()

bool tesseract::ColPartition::MatchingStrokeWidth ( const ColPartition other,
double  fractional_tolerance,
double  constant_tolerance 
) const

Definition at line 440 of file colpartition.cpp.

442  {
443  int match_count = 0;
444  int nonmatch_count = 0;
445  BLOBNBOX_C_IT box_it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
446  BLOBNBOX_C_IT other_it(const_cast<BLOBNBOX_CLIST*>(&other.boxes_));
447  box_it.mark_cycle_pt();
448  other_it.mark_cycle_pt();
449  while (!box_it.cycled_list() && !other_it.cycled_list()) {
450  if (box_it.data()->MatchingStrokeWidth(*other_it.data(),
451  fractional_tolerance,
452  constant_tolerance))
453  ++match_count;
454  else
455  ++nonmatch_count;
456  box_it.forward();
457  other_it.forward();
458  }
459  return match_count > nonmatch_count;
460 }

◆ MatchingTextColor()

bool tesseract::ColPartition::MatchingTextColor ( const ColPartition other) const

Definition at line 392 of file colpartition.cpp.

392  {
393  if (color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise &&
394  other.color1_[L_ALPHA_CHANNEL] > kMaxRMSColorNoise)
395  return false; // Too noisy.
396 
397  // Colors must match for other to count.
398  double d_this1_o = ImageFind::ColorDistanceFromLine(other.color1_,
399  other.color2_,
400  color1_);
401  double d_this2_o = ImageFind::ColorDistanceFromLine(other.color1_,
402  other.color2_,
403  color2_);
404  double d_o1_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
405  other.color1_);
406  double d_o2_this = ImageFind::ColorDistanceFromLine(color1_, color2_,
407  other.color2_);
408 // All 4 distances must be small enough.
409  return d_this1_o < kMaxColorDistance && d_this2_o < kMaxColorDistance &&
410  d_o1_this < kMaxColorDistance && d_o2_this < kMaxColorDistance;
411 }
const int kMaxColorDistance
static double ColorDistanceFromLine(const uinT8 *line1, const uinT8 *line2, const uinT8 *point)
Definition: imagefind.cpp:341
const int kMaxRMSColorNoise

◆ median_bottom()

int tesseract::ColPartition::median_bottom ( ) const
inline

Definition at line 127 of file colpartition.h.

127  {
128  return median_bottom_;
129  }

◆ median_left()

int tesseract::ColPartition::median_left ( ) const
inline

Definition at line 130 of file colpartition.h.

130  {
131  return median_left_;
132  }

◆ median_right()

int tesseract::ColPartition::median_right ( ) const
inline

Definition at line 133 of file colpartition.h.

133  {
134  return median_right_;
135  }

◆ median_size()

int tesseract::ColPartition::median_size ( ) const
inline

Definition at line 136 of file colpartition.h.

136  {
137  return median_size_;
138  }

◆ median_top()

int tesseract::ColPartition::median_top ( ) const
inline

Definition at line 124 of file colpartition.h.

124  {
125  return median_top_;
126  }

◆ median_width()

int tesseract::ColPartition::median_width ( ) const
inline

Definition at line 142 of file colpartition.h.

142  {
143  return median_width_;
144  }

◆ MedianY()

int tesseract::ColPartition::MedianY ( ) const
inline

Definition at line 308 of file colpartition.h.

308  {
309  return (median_top_ + median_bottom_) / 2;
310  }

◆ MidX()

int tesseract::ColPartition::MidX ( ) const
inline

Definition at line 312 of file colpartition.h.

312  {
313  return (bounding_box_.left() + bounding_box_.right()) / 2;
314  }
inT16 left() const
Definition: rect.h:68
inT16 right() const
Definition: rect.h:75

◆ MidY()

int tesseract::ColPartition::MidY ( ) const
inline

Definition at line 304 of file colpartition.h.

304  {
305  return (bounding_box_.top() + bounding_box_.bottom()) / 2;
306  }
inT16 bottom() const
Definition: rect.h:61
inT16 top() const
Definition: rect.h:54

◆ nearest_neighbor_above()

ColPartition* tesseract::ColPartition::nearest_neighbor_above ( ) const
inline

Definition at line 249 of file colpartition.h.

249  {
250  return nearest_neighbor_above_;
251  }

◆ nearest_neighbor_below()

ColPartition* tesseract::ColPartition::nearest_neighbor_below ( ) const
inline

Definition at line 255 of file colpartition.h.

255  {
256  return nearest_neighbor_below_;
257  }

◆ OKDiacriticMerge()

bool tesseract::ColPartition::OKDiacriticMerge ( const ColPartition candidate,
bool  debug 
) const

Definition at line 468 of file colpartition.cpp.

469  {
470  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
471  int min_top = MAX_INT32;
472  int max_bottom = -MAX_INT32;
473  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
474  BLOBNBOX* blob = it.data();
475  if (!blob->IsDiacritic()) {
476  if (debug) {
477  tprintf("Blob is not a diacritic:");
478  blob->bounding_box().print();
479  }
480  return false; // All blobs must have diacritic bases.
481  }
482  if (blob->base_char_top() < min_top)
483  min_top = blob->base_char_top();
484  if (blob->base_char_bottom() > max_bottom)
485  max_bottom = blob->base_char_bottom();
486  }
487  // If the intersection of all vertical ranges of all base characters
488  // overlaps the median range of this, then it is OK.
489  bool result = min_top > candidate.median_bottom_ &&
490  max_bottom < candidate.median_top_;
491  if (debug) {
492  if (result)
493  tprintf("OKDiacritic!\n");
494  else
495  tprintf("y ranges don\'t overlap: %d-%d / %d-%d\n",
496  max_bottom, min_top, median_bottom_, median_top_);
497  }
498  return result;
499 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
int base_char_top() const
Definition: blobbox.h:368
int base_char_bottom() const
Definition: blobbox.h:371
void print() const
Definition: rect.h:270
#define tprintf(...)
Definition: tprintf.h:31
#define MAX_INT32
Definition: host.h:53
bool IsDiacritic() const
Definition: blobbox.h:365

◆ OKMergeOverlap()

bool tesseract::ColPartition::OKMergeOverlap ( const ColPartition merge1,
const ColPartition merge2,
int  ok_box_overlap,
bool  debug 
)

Definition at line 744 of file colpartition.cpp.

746  {
747  // Vertical partitions are not allowed to be involved.
748  if (IsVerticalType() || merge1.IsVerticalType() || merge2.IsVerticalType()) {
749  if (debug)
750  tprintf("Vertical partition\n");
751  return false;
752  }
753  // The merging partitions must strongly overlap each other.
754  if (!merge1.VSignificantCoreOverlap(merge2)) {
755  if (debug)
756  tprintf("Voverlap %d (%d)\n",
757  merge1.VCoreOverlap(merge2),
758  merge1.VSignificantCoreOverlap(merge2));
759  return false;
760  }
761  // The merged box must not overlap the median bounds of this.
762  TBOX merged_box(merge1.bounding_box());
763  merged_box += merge2.bounding_box();
764  if (merged_box.bottom() < median_top_ && merged_box.top() > median_bottom_ &&
765  merged_box.bottom() < bounding_box_.top() - ok_box_overlap &&
766  merged_box.top() > bounding_box_.bottom() + ok_box_overlap) {
767  if (debug)
768  tprintf("Excessive box overlap\n");
769  return false;
770  }
771  // Looks OK!
772  return true;
773 }
bool IsVerticalType() const
Definition: colpartition.h:435
inT16 bottom() const
Definition: rect.h:61
#define tprintf(...)
Definition: tprintf.h:31
inT16 top() const
Definition: rect.h:54
Definition: rect.h:30

◆ OverlapSplitBlob()

BLOBNBOX * tesseract::ColPartition::OverlapSplitBlob ( const TBOX box)

Definition at line 777 of file colpartition.cpp.

777  {
778  if (boxes_.empty() || boxes_.singleton())
779  return NULL;
780  BLOBNBOX_C_IT it(&boxes_);
781  TBOX left_box(it.data()->bounding_box());
782  for (it.forward(); !it.at_first(); it.forward()) {
783  BLOBNBOX* bbox = it.data();
784  left_box += bbox->bounding_box();
785  if (left_box.overlap(box))
786  return bbox;
787  }
788  return NULL;
789 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
Definition: rect.h:30

◆ owns_blobs()

bool tesseract::ColPartition::owns_blobs ( ) const
inline

Definition at line 291 of file colpartition.h.

291  {
292  return owns_blobs_;
293  }

◆ PartitionType()

PolyBlockType tesseract::ColPartition::PartitionType ( ColumnSpanningType  flow) const

Definition at line 1014 of file colpartition.cpp.

1014  {
1015  if (flow == CST_NOISE) {
1016  if (blob_type_ != BRT_HLINE && blob_type_ != BRT_VLINE &&
1017  blob_type_ != BRT_RECTIMAGE && blob_type_ != BRT_VERT_TEXT)
1018  return PT_NOISE;
1019  flow = CST_FLOWING;
1020  }
1021 
1022  switch (blob_type_) {
1023  case BRT_NOISE:
1024  return PT_NOISE;
1025  case BRT_HLINE:
1026  return PT_HORZ_LINE;
1027  case BRT_VLINE:
1028  return PT_VERT_LINE;
1029  case BRT_RECTIMAGE:
1030  case BRT_POLYIMAGE:
1031  switch (flow) {
1032  case CST_FLOWING:
1033  return PT_FLOWING_IMAGE;
1034  case CST_HEADING:
1035  return PT_HEADING_IMAGE;
1036  case CST_PULLOUT:
1037  return PT_PULLOUT_IMAGE;
1038  default:
1039  ASSERT_HOST(!"Undefined flow type for image!");
1040  }
1041  break;
1042  case BRT_VERT_TEXT:
1043  return PT_VERTICAL_TEXT;
1044  case BRT_TEXT:
1045  case BRT_UNKNOWN:
1046  default:
1047  switch (flow) {
1048  case CST_FLOWING:
1049  return PT_FLOWING_TEXT;
1050  case CST_HEADING:
1051  return PT_HEADING_TEXT;
1052  case CST_PULLOUT:
1053  return PT_PULLOUT_TEXT;
1054  default:
1055  ASSERT_HOST(!"Undefined flow type for text!");
1056  }
1057  }
1058  ASSERT_HOST(!"Should never get here!");
1059  return PT_NOISE;
1060 }
Definition: capi.h:98
BlobTextFlowType flow() const
Definition: colpartition.h:154
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ Print()

void tesseract::ColPartition::Print ( ) const

Definition at line 1789 of file colpartition.cpp.

1789  {
1790  int y = MidY();
1791  tprintf("ColPart:%c(M%d-%c%d-B%d/%d,%d/%d)->(%dB-%d%c-%dM/%d,%d/%d)"
1792  " w-ok=%d, v-ok=%d, type=%d%c%d, fc=%d, lc=%d, boxes=%d"
1793  " ts=%d bs=%d ls=%d rs=%d\n",
1794  boxes_.empty() ? 'E' : ' ',
1795  left_margin_, left_key_tab_ ? 'T' : 'B', LeftAtY(y),
1796  bounding_box_.left(), median_left_,
1797  bounding_box_.bottom(), median_bottom_,
1798  bounding_box_.right(), RightAtY(y), right_key_tab_ ? 'T' : 'B',
1799  right_margin_, median_right_, bounding_box_.top(), median_top_,
1800  good_width_, good_column_, type_,
1801  kBlobTypes[blob_type_], flow_,
1802  first_column_, last_column_, boxes_.length(),
1803  space_above_, space_below_, space_to_left_, space_to_right_);
1804 }
int LeftAtY(int y) const
Definition: colpartition.h:340
inT16 bottom() const
Definition: rect.h:61
int RightAtY(int y) const
Definition: colpartition.h:344
inT16 left() const
Definition: rect.h:68
#define tprintf(...)
Definition: tprintf.h:31
inT16 top() const
Definition: rect.h:54
inT16 right() const
Definition: rect.h:75

◆ PrintColors()

void tesseract::ColPartition::PrintColors ( )

Definition at line 1807 of file colpartition.cpp.

1807  {
1808  tprintf("Colors:(%d, %d, %d)%d -> (%d, %d, %d)\n",
1809  color1_[COLOR_RED], color1_[COLOR_GREEN], color1_[COLOR_BLUE],
1810  color1_[L_ALPHA_CHANNEL],
1811  color2_[COLOR_RED], color2_[COLOR_GREEN], color2_[COLOR_BLUE]);
1812 }
#define tprintf(...)
Definition: tprintf.h:31

◆ RefinePartners()

void tesseract::ColPartition::RefinePartners ( PolyBlockType  type,
bool  get_desperate,
ColPartitionGrid grid 
)

Definition at line 1884 of file colpartition.cpp.

1885  {
1886  if (TypesSimilar(type_, type)) {
1887  RefinePartnersInternal(true, get_desperate, grid);
1888  RefinePartnersInternal(false, get_desperate, grid);
1889  } else if (type == PT_COUNT) {
1890  // This is the final pass. Make sure only the correctly typed
1891  // partners surivive, however many there are.
1892  RefinePartnersByType(true, &upper_partners_);
1893  RefinePartnersByType(false, &lower_partners_);
1894  // It is possible for a merge to have given a partition multiple
1895  // partners again, so the last resort is to use overlap which is
1896  // guaranteed to leave at most one partner left.
1897  if (!upper_partners_.empty() && !upper_partners_.singleton())
1898  RefinePartnersByOverlap(true, &upper_partners_);
1899  if (!lower_partners_.empty() && !lower_partners_.singleton())
1900  RefinePartnersByOverlap(false, &lower_partners_);
1901  }
1902 }
PolyBlockType type() const
Definition: colpartition.h:181
Definition: capi.h:98
static bool TypesSimilar(PolyBlockType type1, PolyBlockType type2)
Definition: colpartition.h:412

◆ ReflectInYAxis()

void tesseract::ColPartition::ReflectInYAxis ( )

Definition at line 330 of file colpartition.cpp.

330  {
331  BLOBNBOX_CLIST reversed_boxes;
332  BLOBNBOX_C_IT reversed_it(&reversed_boxes);
333  // Reverse the order of the boxes_.
334  BLOBNBOX_C_IT bb_it(&boxes_);
335  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
336  reversed_it.add_before_then_move(bb_it.extract());
337  }
338  bb_it.add_list_after(&reversed_boxes);
339  ASSERT_HOST(!left_key_tab_ && !right_key_tab_);
340  int tmp = left_margin_;
341  left_margin_ = -right_margin_;
342  right_margin_ = -tmp;
343  ComputeLimits();
344 }
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ ReleaseNonLeaderBoxes()

bool tesseract::ColPartition::ReleaseNonLeaderBoxes ( )

Definition at line 299 of file colpartition.cpp.

299  {
300  BLOBNBOX_C_IT bb_it(&boxes_);
301  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
302  BLOBNBOX* bblob = bb_it.data();
303  if (bblob->flow() != BTFT_LEADER) {
304  if (bblob->owner() == this) bblob->set_owner(NULL);
305  bb_it.extract();
306  }
307  }
308  if (bb_it.empty()) return false;
309  flow_ = BTFT_LEADER;
310  ComputeLimits();
311  return true;
312 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
BlobTextFlowType flow() const
Definition: blobbox.h:280
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340

◆ RemoveBox()

void tesseract::ColPartition::RemoveBox ( BLOBNBOX box)

Definition at line 212 of file colpartition.cpp.

212  {
213  BLOBNBOX_C_IT bb_it(&boxes_);
214  for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) {
215  if (box == bb_it.data()) {
216  bb_it.extract();
217  ComputeLimits();
218  return;
219  }
220  }
221 }

◆ RemovePartner()

void tesseract::ColPartition::RemovePartner ( bool  upper,
ColPartition partner 
)

Definition at line 628 of file colpartition.cpp.

628  {
629  ColPartition_C_IT it(upper ? &upper_partners_ : &lower_partners_);
630  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
631  if (it.data() == partner) {
632  it.extract();
633  break;
634  }
635  }
636 }

◆ right_key()

int tesseract::ColPartition::right_key ( ) const
inline

Definition at line 178 of file colpartition.h.

178  {
179  return right_key_;
180  }

◆ right_key_tab()

bool tesseract::ColPartition::right_key_tab ( ) const
inline

Definition at line 175 of file colpartition.h.

175  {
176  return right_key_tab_;
177  }

◆ right_margin()

int tesseract::ColPartition::right_margin ( ) const
inline

Definition at line 118 of file colpartition.h.

118  {
119  return right_margin_;
120  }

◆ RightAtY()

int tesseract::ColPartition::RightAtY ( int  y) const
inline

Definition at line 344 of file colpartition.h.

344  {
345  return XAtY(right_key_, y);
346  }
int XAtY(int sort_key, int y) const
Definition: colpartition.h:320

◆ RightBlobRule()

int tesseract::ColPartition::RightBlobRule ( ) const

Definition at line 560 of file colpartition.cpp.

560  {
561  BLOBNBOX_C_IT it(const_cast<BLOBNBOX_CLIST*>(&boxes_));
562  it.move_to_last();
563  return it.data()->right_rule();
564 }

◆ set_blob_type()

void tesseract::ColPartition::set_blob_type ( BlobRegionType  t)
inline

Definition at line 151 of file colpartition.h.

151  {
152  blob_type_ = t;
153  }

◆ set_block_owned()

void tesseract::ColPartition::set_block_owned ( bool  owned)
inline

Definition at line 208 of file colpartition.h.

208  {
209  block_owned_ = owned;
210  }

◆ set_bottom_spacing()

void tesseract::ColPartition::set_bottom_spacing ( int  spacing)
inline

Definition at line 223 of file colpartition.h.

223  {
224  bottom_spacing_ = spacing;
225  }

◆ set_first_column()

void tesseract::ColPartition::set_first_column ( int  column)
inline

Definition at line 727 of file colpartition.h.

727  {
728  first_column_ = column;
729  }

◆ set_flow()

void tesseract::ColPartition::set_flow ( BlobTextFlowType  f)
inline

Definition at line 157 of file colpartition.h.

157  {
158  flow_ = f;
159  }

◆ set_inside_table_column()

void tesseract::ColPartition::set_inside_table_column ( bool  val)
inline

Definition at line 246 of file colpartition.h.

246  {
247  inside_table_column_ = val;
248  }

◆ set_last_column()

void tesseract::ColPartition::set_last_column ( int  column)
inline

Definition at line 730 of file colpartition.h.

730  {
731  last_column_ = column;
732  }

◆ set_left_margin()

void tesseract::ColPartition::set_left_margin ( int  margin)
inline

Definition at line 115 of file colpartition.h.

115  {
116  left_margin_ = margin;
117  }

◆ set_median_size()

void tesseract::ColPartition::set_median_size ( int  size)
inline

Definition at line 139 of file colpartition.h.

139  {
140  median_size_ = size;
141  }

◆ set_median_width()

void tesseract::ColPartition::set_median_width ( int  width)
inline

Definition at line 145 of file colpartition.h.

145  {
146  median_width_ = width;
147  }

◆ set_nearest_neighbor_above()

void tesseract::ColPartition::set_nearest_neighbor_above ( ColPartition part)
inline

Definition at line 252 of file colpartition.h.

252  {
253  nearest_neighbor_above_ = part;
254  }

◆ set_nearest_neighbor_below()

void tesseract::ColPartition::set_nearest_neighbor_below ( ColPartition part)
inline

Definition at line 258 of file colpartition.h.

258  {
259  nearest_neighbor_below_ = part;
260  }

◆ set_owns_blobs()

void tesseract::ColPartition::set_owns_blobs ( bool  owns_blobs)
inline

Definition at line 294 of file colpartition.h.

294  {
295  // Do NOT change ownership flag when there are blobs in the list.
296  // Immediately set the ownership flag when creating copies.
297  ASSERT_HOST(boxes_.empty());
298  owns_blobs_ = owns_blobs;
299  }
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ set_right_margin()

void tesseract::ColPartition::set_right_margin ( int  margin)
inline

Definition at line 121 of file colpartition.h.

121  {
122  right_margin_ = margin;
123  }

◆ set_side_step()

void tesseract::ColPartition::set_side_step ( int  step)
inline

Definition at line 217 of file colpartition.h.

217  {
218  side_step_ = step;
219  }

◆ set_space_above()

void tesseract::ColPartition::set_space_above ( int  space)
inline

Definition at line 264 of file colpartition.h.

264  {
265  space_above_ = space;
266  }

◆ set_space_below()

void tesseract::ColPartition::set_space_below ( int  space)
inline

Definition at line 270 of file colpartition.h.

270  {
271  space_below_ = space;
272  }

◆ set_space_to_left()

void tesseract::ColPartition::set_space_to_left ( int  space)
inline

Definition at line 276 of file colpartition.h.

276  {
277  space_to_left_ = space;
278  }

◆ set_space_to_right()

void tesseract::ColPartition::set_space_to_right ( int  space)
inline

Definition at line 282 of file colpartition.h.

282  {
283  space_to_right_ = space;
284  }

◆ set_table_type()

void tesseract::ColPartition::set_table_type ( )
inline

Definition at line 233 of file colpartition.h.

233  {
234  if (type_ != PT_TABLE) {
235  type_before_table_ = type_;
236  type_ = PT_TABLE;
237  }
238  }
Definition: capi.h:97

◆ set_top_spacing()

void tesseract::ColPartition::set_top_spacing ( int  spacing)
inline

Definition at line 229 of file colpartition.h.

229  {
230  top_spacing_ = spacing;
231  }

◆ set_type()

void tesseract::ColPartition::set_type ( PolyBlockType  t)
inline

Definition at line 184 of file colpartition.h.

184  {
185  type_ = t;
186  }

◆ set_vertical()

void tesseract::ColPartition::set_vertical ( const ICOORD v)
inline

Definition at line 193 of file colpartition.h.

193  {
194  vertical_ = v;
195  }

◆ set_working_set()

void tesseract::ColPartition::set_working_set ( WorkingPartSet working_set)
inline

Definition at line 202 of file colpartition.h.

202  {
203  working_set_ = working_set;
204  }

◆ SetBlobTypes()

void tesseract::ColPartition::SetBlobTypes ( )

Definition at line 1274 of file colpartition.cpp.

1274  {
1275  if (!owns_blobs())
1276  return;
1277  BLOBNBOX_C_IT it(&boxes_);
1278  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1279  BLOBNBOX* blob = it.data();
1280  if (blob->flow() != BTFT_LEADER)
1281  blob->set_flow(flow_);
1282  blob->set_region_type(blob_type_);
1283  ASSERT_HOST(blob->owner() == NULL || blob->owner() == this);
1284  }
1285 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
BlobTextFlowType flow() const
Definition: blobbox.h:280
void set_flow(BlobTextFlowType value)
Definition: blobbox.h:283
void set_region_type(BlobRegionType new_type)
Definition: blobbox.h:271
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ SetColumnGoodness()

void tesseract::ColPartition::SetColumnGoodness ( WidthCallback cb)

Definition at line 1078 of file colpartition.cpp.

1078  {
1079  int y = MidY();
1080  int width = RightAtY(y) - LeftAtY(y);
1081  good_width_ = cb->Run(width);
1082  good_column_ = blob_type_ == BRT_TEXT && left_key_tab_ && right_key_tab_;
1083 }
int LeftAtY(int y) const
Definition: colpartition.h:340
int RightAtY(int y) const
Definition: colpartition.h:344

◆ SetLeftTab()

void tesseract::ColPartition::SetLeftTab ( const TabVector tab_vector)

Definition at line 504 of file colpartition.cpp.

504  {
505  if (tab_vector != NULL) {
506  left_key_ = tab_vector->sort_key();
507  left_key_tab_ = left_key_ <= BoxLeftKey();
508  } else {
509  left_key_tab_ = false;
510  }
511  if (!left_key_tab_)
512  left_key_ = BoxLeftKey();
513 }

◆ SetPartitionType()

void tesseract::ColPartition::SetPartitionType ( int  resolution,
ColPartitionSet columns 
)

Definition at line 981 of file colpartition.cpp.

981  {
982  int first_spanned_col = -1;
983  ColumnSpanningType span_type =
984  columns->SpanningType(resolution,
985  bounding_box_.left(), bounding_box_.right(),
986  MIN(bounding_box_.height(), bounding_box_.width()),
987  MidY(), left_margin_, right_margin_,
988  &first_column_, &last_column_,
989  &first_spanned_col);
990  column_set_ = columns;
991  if (first_column_ < last_column_ && span_type == CST_PULLOUT &&
992  !IsLineType()) {
993  // Unequal columns may indicate that the pullout spans one of the columns
994  // it lies in, so force it to be allocated to just that column.
995  if (first_spanned_col >= 0) {
996  first_column_ = first_spanned_col;
997  last_column_ = first_spanned_col;
998  } else {
999  if ((first_column_ & 1) == 0)
1000  last_column_ = first_column_;
1001  else if ((last_column_ & 1) == 0)
1002  first_column_ = last_column_;
1003  else
1004  first_column_ = last_column_ = (first_column_ + last_column_) / 2;
1005  }
1006  }
1007  type_ = PartitionType(span_type);
1008 }
inT16 width() const
Definition: rect.h:111
#define MIN(x, y)
Definition: ndminx.h:28
PolyBlockType PartitionType(ColumnSpanningType flow) const
inT16 left() const
Definition: rect.h:68
inT16 height() const
Definition: rect.h:104
inT16 right() const
Definition: rect.h:75

◆ SetRegionAndFlowTypesFromProjectionValue()

void tesseract::ColPartition::SetRegionAndFlowTypesFromProjectionValue ( int  value)

Definition at line 1200 of file colpartition.cpp.

1200  {
1201  int blob_count = 0; // Total # blobs.
1202  int good_blob_score_ = 0; // Total # good strokewidth neighbours.
1203  int noisy_count = 0; // Total # neighbours marked as noise.
1204  int hline_count = 0;
1205  int vline_count = 0;
1206  BLOBNBOX_C_IT it(&boxes_);
1207  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1208  BLOBNBOX* blob = it.data();
1209  ++blob_count;
1210  noisy_count += blob->NoisyNeighbours();
1211  good_blob_score_ += blob->GoodTextBlob();
1212  if (blob->region_type() == BRT_HLINE) ++hline_count;
1213  if (blob->region_type() == BRT_VLINE) ++vline_count;
1214  }
1215  flow_ = BTFT_NEIGHBOURS;
1216  blob_type_ = BRT_UNKNOWN;
1217  if (hline_count > vline_count) {
1218  flow_ = BTFT_NONE;
1219  blob_type_ = BRT_HLINE;
1220  } else if (vline_count > hline_count) {
1221  flow_ = BTFT_NONE;
1222  blob_type_ = BRT_VLINE;
1223  } else if (value < -1 || 1 < value) {
1224  int long_side;
1225  int short_side;
1226  if (value > 0) {
1227  long_side = bounding_box_.width();
1228  short_side = bounding_box_.height();
1229  blob_type_ = BRT_TEXT;
1230  } else {
1231  long_side = bounding_box_.height();
1232  short_side = bounding_box_.width();
1233  blob_type_ = BRT_VERT_TEXT;
1234  }
1235  // We will combine the old metrics using aspect ratio and blob counts
1236  // with the input value by allowing a strong indication to flip the
1237  // STRONG_CHAIN/CHAIN flow values.
1238  int strong_score = blob_count >= kHorzStrongTextlineCount ? 1 : 0;
1239  if (short_side > kHorzStrongTextlineHeight) ++strong_score;
1240  if (short_side * kHorzStrongTextlineAspect < long_side) ++strong_score;
1241  if (abs(value) >= kMinStrongTextValue)
1242  flow_ = BTFT_STRONG_CHAIN;
1243  else if (abs(value) >= kMinChainTextValue)
1244  flow_ = BTFT_CHAIN;
1245  else
1246  flow_ = BTFT_NEIGHBOURS;
1247  // Upgrade chain to strong chain if the other indicators are good
1248  if (flow_ == BTFT_CHAIN && strong_score == 3)
1249  flow_ = BTFT_STRONG_CHAIN;
1250  // Downgrade strong vertical text to chain if the indicators are bad.
1251  if (flow_ == BTFT_STRONG_CHAIN && value < 0 && strong_score < 2)
1252  flow_ = BTFT_CHAIN;
1253  }
1254  if (flow_ == BTFT_NEIGHBOURS) {
1255  // Check for noisy neighbours.
1256  if (noisy_count >= blob_count) {
1257  flow_ = BTFT_NONTEXT;
1258  blob_type_= BRT_NOISE;
1259  }
1260  }
1261  if (TabFind::WithinTestRegion(2, bounding_box_.left(),
1262  bounding_box_.bottom())) {
1263  tprintf("RegionFlowTypesFromProjectionValue count=%d, noisy=%d, score=%d,",
1264  blob_count, noisy_count, good_blob_score_);
1265  tprintf(" Projection value=%d, flow=%d, blob_type=%d\n",
1266  value, flow_, blob_type_);
1267  Print();
1268  }
1269  SetBlobTypes();
1270 }
int GoodTextBlob() const
Definition: blobbox.cpp:221
static bool WithinTestRegion(int detail_level, int x, int y)
const int kMinStrongTextValue
const int kHorzStrongTextlineCount
inT16 width() const
Definition: rect.h:111
inT16 bottom() const
Definition: rect.h:61
int NoisyNeighbours() const
Definition: blobbox.cpp:232
inT16 left() const
Definition: rect.h:68
inT16 height() const
Definition: rect.h:104
const int kHorzStrongTextlineAspect
#define tprintf(...)
Definition: tprintf.h:31
const int kMinChainTextValue
BlobRegionType region_type() const
Definition: blobbox.h:268
const int kHorzStrongTextlineHeight

◆ SetRightTab()

void tesseract::ColPartition::SetRightTab ( const TabVector tab_vector)

Definition at line 516 of file colpartition.cpp.

516  {
517  if (tab_vector != NULL) {
518  right_key_ = tab_vector->sort_key();
519  right_key_tab_ = right_key_ >= BoxRightKey();
520  } else {
521  right_key_tab_ = false;
522  }
523  if (!right_key_tab_)
524  right_key_ = BoxRightKey();
525 }

◆ SetSpecialBlobsDensity()

void tesseract::ColPartition::SetSpecialBlobsDensity ( const BlobSpecialTextType  type,
const float  density 
)

Definition at line 586 of file colpartition.cpp.

587  {
589  special_blobs_densities_[type] = density;
590 }
PolyBlockType type() const
Definition: colpartition.h:181
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ ShallowCopy()

ColPartition * tesseract::ColPartition::ShallowCopy ( ) const

Definition at line 1738 of file colpartition.cpp.

1738  {
1739  ColPartition* part = new ColPartition(blob_type_, vertical_);
1740  part->left_margin_ = left_margin_;
1741  part->right_margin_ = right_margin_;
1742  part->bounding_box_ = bounding_box_;
1743  memcpy(part->special_blobs_densities_, special_blobs_densities_,
1744  sizeof(special_blobs_densities_));
1745  part->median_bottom_ = median_bottom_;
1746  part->median_top_ = median_top_;
1747  part->median_size_ = median_size_;
1748  part->median_left_ = median_left_;
1749  part->median_right_ = median_right_;
1750  part->median_width_ = median_width_;
1751  part->good_width_ = good_width_;
1752  part->good_column_ = good_column_;
1753  part->left_key_tab_ = left_key_tab_;
1754  part->right_key_tab_ = right_key_tab_;
1755  part->type_ = type_;
1756  part->flow_ = flow_;
1757  part->left_key_ = left_key_;
1758  part->right_key_ = right_key_;
1759  part->first_column_ = first_column_;
1760  part->last_column_ = last_column_;
1761  part->owns_blobs_ = false;
1762  return part;
1763 }

◆ SingletonPartner()

ColPartition * tesseract::ColPartition::SingletonPartner ( bool  upper)

Definition at line 639 of file colpartition.cpp.

639  {
640  ColPartition_CLIST* partners = upper ? &upper_partners_ : &lower_partners_;
641  if (!partners->singleton())
642  return NULL;
643  ColPartition_C_IT it(partners);
644  return it.data();
645 }

◆ SmoothPartnerRun()

void tesseract::ColPartition::SmoothPartnerRun ( int  working_set_count)

Definition at line 1815 of file colpartition.cpp.

1815  {
1816  STATS left_stats(0, working_set_count);
1817  STATS right_stats(0, working_set_count);
1818  PolyBlockType max_type = type_;
1819  ColPartition* partner;
1820  for (partner = SingletonPartner(false); partner != NULL;
1821  partner = partner->SingletonPartner(false)) {
1822  if (partner->type_ > max_type)
1823  max_type = partner->type_;
1824  if (column_set_ == partner->column_set_) {
1825  left_stats.add(partner->first_column_, 1);
1826  right_stats.add(partner->last_column_, 1);
1827  }
1828  }
1829  type_ = max_type;
1830  // TODO(rays) Either establish that it isn't necessary to set the columns,
1831  // or find a way to do it that does not cause an assert failure in
1832  // AddToWorkingSet.
1833 #if 0
1834  first_column_ = left_stats.mode();
1835  last_column_ = right_stats.mode();
1836  if (last_column_ < first_column_)
1837  last_column_ = first_column_;
1838 #endif
1839 
1840  for (partner = SingletonPartner(false); partner != NULL;
1841  partner = partner->SingletonPartner(false)) {
1842  partner->type_ = max_type;
1843 #if 0 // See TODO above
1844  if (column_set_ == partner->column_set_) {
1845  partner->first_column_ = first_column_;
1846  partner->last_column_ = last_column_;
1847  }
1848 #endif
1849  }
1850 }
ColPartition * SingletonPartner(bool upper)
PolyBlockType
Definition: publictypes.h:41
Definition: statistc.h:33

◆ SortByBBox()

static int tesseract::ColPartition::SortByBBox ( const void *  p1,
const void *  p2 
)
inlinestatic

Definition at line 708 of file colpartition.h.

708  {
709  const ColPartition* part1 =
710  *reinterpret_cast<const ColPartition* const*>(p1);
711  const ColPartition* part2 =
712  *reinterpret_cast<const ColPartition* const*>(p2);
713  int mid_y1 = part1->bounding_box_.y_middle();
714  int mid_y2 = part2->bounding_box_.y_middle();
715  if ((part2->bounding_box_.bottom() <= mid_y1 &&
716  mid_y1 <= part2->bounding_box_.top()) ||
717  (part1->bounding_box_.bottom() <= mid_y2 &&
718  mid_y2 <= part1->bounding_box_.top())) {
719  // Sort by increasing x.
720  return part1->bounding_box_.x_middle() - part2->bounding_box_.x_middle();
721  }
722  // Sort by decreasing y.
723  return mid_y2 - mid_y1;
724  }

◆ SortKey()

int tesseract::ColPartition::SortKey ( int  x,
int  y 
) const
inline

Definition at line 316 of file colpartition.h.

316  {
317  return TabVector::SortKey(vertical_, x, y);
318  }
static int SortKey(const ICOORD &vertical, int x, int y)
Definition: tabvector.h:280

◆ space_above()

int tesseract::ColPartition::space_above ( ) const
inline

Definition at line 261 of file colpartition.h.

261  {
262  return space_above_;
263  }

◆ space_below()

int tesseract::ColPartition::space_below ( ) const
inline

Definition at line 267 of file colpartition.h.

267  {
268  return space_below_;
269  }

◆ space_to_left()

int tesseract::ColPartition::space_to_left ( ) const
inline

Definition at line 273 of file colpartition.h.

273  {
274  return space_to_left_;
275  }

◆ space_to_right()

int tesseract::ColPartition::space_to_right ( ) const
inline

Definition at line 279 of file colpartition.h.

279  {
280  return space_to_right_;
281  }

◆ SpecialBlobsCount()

int tesseract::ColPartition::SpecialBlobsCount ( const BlobSpecialTextType  type)

Definition at line 571 of file colpartition.cpp.

571  {
573  BLOBNBOX_C_IT blob_it(&boxes_);
574  int count = 0;
575  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
576  BLOBNBOX* blob = blob_it.data();
578  if (blob_type == type) {
579  count++;
580  }
581  }
582 
583  return count;
584 }
int count(LIST var_list)
Definition: oldlist.cpp:103
PolyBlockType type() const
Definition: colpartition.h:181
BlobSpecialTextType special_text_type() const
Definition: blobbox.h:274
BlobRegionType blob_type() const
Definition: colpartition.h:148
BlobSpecialTextType
Definition: blobbox.h:81
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ SpecialBlobsDensity()

float tesseract::ColPartition::SpecialBlobsDensity ( const BlobSpecialTextType  type) const

Definition at line 566 of file colpartition.cpp.

566  {
568  return special_blobs_densities_[type];
569 }
PolyBlockType type() const
Definition: colpartition.h:181
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ SplitAt()

ColPartition * tesseract::ColPartition::SplitAt ( int  split_x)

Definition at line 831 of file colpartition.cpp.

831  {
832  if (split_x <= bounding_box_.left() || split_x >= bounding_box_.right())
833  return NULL; // There will be no change.
834  ColPartition* split_part = ShallowCopy();
835  split_part->set_owns_blobs(owns_blobs());
836  BLOBNBOX_C_IT it(&boxes_);
837  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
838  BLOBNBOX* bbox = it.data();
839  ColPartition* prev_owner = bbox->owner();
840  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL);
841  const TBOX& box = bbox->bounding_box();
842  if (box.left() >= split_x) {
843  split_part->AddBox(it.extract());
844  if (owns_blobs() && prev_owner != NULL)
845  bbox->set_owner(split_part);
846  }
847  }
848  if (it.empty()) {
849  // Possible if split-x passes through the first blob.
850  it.add_list_after(&split_part->boxes_);
851  }
852  ASSERT_HOST(!it.empty());
853  if (split_part->IsEmpty()) {
854  // Split part ended up with nothing. Possible if split_x passes
855  // through the last blob.
856  delete split_part;
857  return NULL;
858  }
859  right_key_tab_ = false;
860  split_part->left_key_tab_ = false;
861  right_margin_ = split_x;
862  split_part->left_margin_ = split_x;
863  ComputeLimits();
864  split_part->ComputeLimits();
865  return split_part;
866 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 left() const
Definition: rect.h:68
ColPartition * ShallowCopy() const
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ SplitAtBlob()

ColPartition * tesseract::ColPartition::SplitAtBlob ( BLOBNBOX split_blob)

Definition at line 795 of file colpartition.cpp.

795  {
796  ColPartition* split_part = ShallowCopy();
797  split_part->set_owns_blobs(owns_blobs());
798  BLOBNBOX_C_IT it(&boxes_);
799  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
800  BLOBNBOX* bbox = it.data();
801  ColPartition* prev_owner = bbox->owner();
802  ASSERT_HOST(!owns_blobs() || prev_owner == this || prev_owner == NULL);
803  if (bbox == split_blob || !split_part->boxes_.empty()) {
804  split_part->AddBox(it.extract());
805  if (owns_blobs() && prev_owner != NULL)
806  bbox->set_owner(split_part);
807  }
808  }
809  ASSERT_HOST(!it.empty());
810  if (split_part->IsEmpty()) {
811  // Split part ended up with nothing. Possible if split_blob is not
812  // in the list of blobs.
813  delete split_part;
814  return NULL;
815  }
816  right_key_tab_ = false;
817  split_part->left_key_tab_ = false;
818  ComputeLimits();
819  // TODO(nbeato) Merge Ray's CL like this:
820  // if (owns_blobs())
821  // SetBlobTextlineGoodness();
822  split_part->ComputeLimits();
823  // TODO(nbeato) Merge Ray's CL like this:
824  // if (split_part->owns_blobs())
825  // split_part->SetBlobTextlineGoodness();
826  return split_part;
827 }
tesseract::ColPartition * owner() const
Definition: blobbox.h:337
ColPartition * ShallowCopy() const
void set_owner(tesseract::ColPartition *new_owner)
Definition: blobbox.h:340
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ top_spacing()

int tesseract::ColPartition::top_spacing ( ) const
inline

Definition at line 226 of file colpartition.h.

226  {
227  return top_spacing_;
228  }

◆ type()

PolyBlockType tesseract::ColPartition::type ( ) const
inline

Definition at line 181 of file colpartition.h.

181  {
182  return type_;
183  }

◆ TypesMatch() [1/2]

bool tesseract::ColPartition::TypesMatch ( const ColPartition other) const
inline

Definition at line 403 of file colpartition.h.

403  {
404  return TypesMatch(blob_type_, other.blob_type_);
405  }
bool TypesMatch(const ColPartition &other) const
Definition: colpartition.h:403

◆ TypesMatch() [2/2]

static bool tesseract::ColPartition::TypesMatch ( BlobRegionType  type1,
BlobRegionType  type2 
)
inlinestatic

Definition at line 406 of file colpartition.h.

406  {
407  return (type1 == type2 || type1 == BRT_UNKNOWN || type2 == BRT_UNKNOWN) &&
408  !BLOBNBOX::IsLineType(type1) && !BLOBNBOX::IsLineType(type2);
409  }
static bool IsLineType(BlobRegionType type)
Definition: blobbox.h:411

◆ TypesSimilar()

static bool tesseract::ColPartition::TypesSimilar ( PolyBlockType  type1,
PolyBlockType  type2 
)
inlinestatic

Definition at line 412 of file colpartition.h.

412  {
413  return (type1 == type2 ||
414  (type1 == PT_FLOWING_TEXT && type2 == PT_INLINE_EQUATION) ||
415  (type2 == PT_FLOWING_TEXT && type1 == PT_INLINE_EQUATION));
416  }

◆ upper_partners()

ColPartition_CLIST* tesseract::ColPartition::upper_partners ( )
inline

Definition at line 196 of file colpartition.h.

196  {
197  return &upper_partners_;
198  }

◆ VCoreOverlap()

int tesseract::ColPartition::VCoreOverlap ( const ColPartition other) const
inline

Definition at line 375 of file colpartition.h.

375  {
376  return MIN(median_top_, other.median_top_) -
377  MAX(median_bottom_, other.median_bottom_);
378  }
#define MIN(x, y)
Definition: ndminx.h:28
#define MAX(x, y)
Definition: ndminx.h:24

◆ VOverlaps()

bool tesseract::ColPartition::VOverlaps ( const ColPartition other) const
inline

Definition at line 370 of file colpartition.h.

370  {
371  return bounding_box_.y_gap(other.bounding_box_) < 0;
372  }
int y_gap(const TBOX &box) const
Definition: rect.h:225

◆ VSignificantCoreOverlap()

bool tesseract::ColPartition::VSignificantCoreOverlap ( const ColPartition other) const
inline

Definition at line 387 of file colpartition.h.

387  {
388  int overlap = VCoreOverlap(other);
389  int height = MIN(median_top_ - median_bottom_,
390  other.median_top_ - other.median_bottom_);
391  return overlap * 3 > height;
392  }
#define MIN(x, y)
Definition: ndminx.h:28
int VCoreOverlap(const ColPartition &other) const
Definition: colpartition.h:375

◆ WithinSameMargins()

bool tesseract::ColPartition::WithinSameMargins ( const ColPartition other) const
inline

Definition at line 395 of file colpartition.h.

395  {
396  return left_margin_ <= other.bounding_box_.left() &&
397  bounding_box_.left() >= other.left_margin_ &&
398  bounding_box_.right() <= other.right_margin_ &&
399  right_margin_ >= other.bounding_box_.right();
400  }
inT16 left() const
Definition: rect.h:68
inT16 right() const
Definition: rect.h:75

◆ XAtY()

int tesseract::ColPartition::XAtY ( int  sort_key,
int  y 
) const
inline

Definition at line 320 of file colpartition.h.

320  {
321  return TabVector::XAtY(vertical_, sort_key, y);
322  }
int XAtY(int y) const
Definition: tabvector.h:189

The documentation for this class was generated from the following files: