tesseract  3.05.02
strokewidth.h
Go to the documentation of this file.
1 // File: strokewidth.h
3 // Description: Subclass of BBGrid to find uniformity of strokewidth.
4 // Author: Ray Smith
5 // Created: Mon Mar 31 16:17:01 PST 2008
6 //
7 // (C) Copyright 2008, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_TEXTORD_STROKEWIDTH_H__
21 #define TESSERACT_TEXTORD_STROKEWIDTH_H__
22 
23 #include "blobbox.h" // BlobNeighourDir.
24 #include "blobgrid.h" // Base class.
25 #include "colpartitiongrid.h"
26 #include "textlineprojection.h"
27 
28 class DENORM;
29 class ScrollView;
30 class TO_BLOCK;
31 
32 namespace tesseract {
33 
34 class ColPartition_LIST;
35 class TabFind;
36 class TextlineProjection;
37 
38 // Misc enums to clarify bool arguments for direction-controlling args.
42 };
43 
44 // Return value from FindInitialPartitions indicates detection of severe
45 // skew or noise.
47  PFR_OK, // Everything is OK.
48  PFR_SKEW, // Skew was detected and rotated.
49  PFR_NOISE // Noise was detected and removed.
50 };
51 
57 class StrokeWidth : public BlobGrid {
58  public:
59  StrokeWidth(int gridsize, const ICOORD& bleft, const ICOORD& tright);
60  virtual ~StrokeWidth();
61 
62  // Sets the neighbours member of the medium-sized blobs in the block.
63  // Searches on 4 sides of each blob for similar-sized, similar-strokewidth
64  // blobs and sets pointers to the good neighbours.
66 
67  // Sets the neighbour/textline writing direction members of the medium
68  // and large blobs with optional repair of broken CJK characters first.
69  // Repair of broken CJK is needed here because broken CJK characters
70  // can fool the textline direction detection algorithm.
72  bool cjk_merge,
73  TO_BLOCK* input_block);
74 
75  // To save computation, the process of generating partitions is broken
76  // into the following 4 steps:
77  // TestVerticalTextDirection
78  // CorrectForRotation (used only if a rotation is to be applied)
79  // FindLeaderPartitions
80  // GradeBlobsIntoPartitions.
81  // These functions are all required, in sequence, except for
82  // CorrectForRotation, which is not needed if no rotation is applied.
83 
84  // Types all the blobs as vertical or horizontal text or unknown and
85  // returns true if the majority are vertical.
86  // If the blobs are rotated, it is necessary to call CorrectForRotation
87  // after rotating everything, otherwise the work done here will be enough.
88  // If osd_blobs is not null, a list of blobs from the dominant textline
89  // direction are returned for use in orientation and script detection.
90  // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio.
91  bool TestVerticalTextDirection(double find_vertical_text_ratio,
92  TO_BLOCK* block,
93  BLOBNBOX_CLIST* osd_blobs);
94 
95  // Corrects the data structures for the given rotation.
96  void CorrectForRotation(const FCOORD& rerotation,
97  ColPartitionGrid* part_grid);
98 
99  // Finds leader partitions and inserts them into the give grid.
100  void FindLeaderPartitions(TO_BLOCK* block,
101  ColPartitionGrid* part_grid);
102 
103  // Finds and marks noise those blobs that look like bits of vertical lines
104  // that would otherwise screw up layout analysis.
105  void RemoveLineResidue(ColPartition_LIST* big_part_list);
106 
107  // Types all the blobs as vertical text or horizontal text or unknown and
108  // puts them into initial ColPartitions in the supplied part_grid.
109  // rerotation determines how to get back to the image coordinates from the
110  // blob coordinates (since they may have been rotated for vertical text).
111  // block is the single block for the whole page or rectangle to be OCRed.
112  // nontext_pix (full-size), is a binary mask used to prevent merges across
113  // photo/text boundaries. It is not kept beyond this function.
114  // denorm provides a mapping back to the image from the current blob
115  // coordinate space.
116  // projection provides a measure of textline density over the image and
117  // provides functions to assist with diacritic detection. It should be a
118  // pointer to a new TextlineProjection, and will be setup here.
119  // part_grid is the output grid of textline partitions.
120  // Large blobs that cause overlap are put in separate partitions and added
121  // to the big_parts list.
122  void GradeBlobsIntoPartitions(PageSegMode pageseg_mode,
123  const FCOORD& rerotation, TO_BLOCK* block,
124  Pix* nontext_pix, const DENORM* denorm,
125  bool cjk_script, TextlineProjection* projection,
126  BLOBNBOX_LIST* diacritic_blobs,
127  ColPartitionGrid* part_grid,
128  ColPartition_LIST* big_parts);
129 
130  // Handles a click event in a display window.
131  virtual void HandleClick(int x, int y);
132 
133  private:
134  // Computes the noise_density_ by summing the number of elements in a
135  // neighbourhood of each grid cell.
136  void ComputeNoiseDensity(TO_BLOCK* block, TabFind* line_grid);
137 
138  // Detects and marks leader dots/dashes.
139  // Leaders are horizontal chains of small or noise blobs that look
140  // monospace according to ColPartition::MarkAsLeaderIfMonospaced().
141  // Detected leaders become the only occupants of the block->small_blobs list.
142  // Non-leader small blobs get moved to the blobs list.
143  // Non-leader noise blobs remain singletons in the noise list.
144  // All small and noise blobs in high density regions are marked BTFT_NONTEXT.
145  // block is the single block for the whole page or rectangle to be OCRed.
146  // leader_parts is the output.
147  void FindLeadersAndMarkNoise(TO_BLOCK* block,
148  ColPartition_LIST* leader_parts);
149 
152  void InsertBlobs(TO_BLOCK* block);
153 
154  // Fix broken CJK characters, using the fake joined blobs mechanism.
155  // Blobs are really merged, ie the master takes all the outlines and the
156  // others are deleted.
157  // Returns true if sufficient blobs are merged that it may be worth running
158  // again, due to a better estimate of character size.
159  bool FixBrokenCJK(TO_BLOCK* block);
160 
161  // Collect blobs that overlap or are within max_dist of the input bbox.
162  // Return them in the list of blobs and expand the bbox to be the union
163  // of all the boxes. not_this is excluded from the search, as are blobs
164  // that cause the merged box to exceed max_size in either dimension.
165  void AccumulateOverlaps(const BLOBNBOX* not_this, bool debug,
166  int max_size, int max_dist,
167  TBOX* bbox, BLOBNBOX_CLIST* blobs);
168 
169  // For each blob in this grid, Finds the textline direction to be horizontal
170  // or vertical according to distance to neighbours and 1st and 2nd order
171  // neighbours. Non-text tends to end up without a definite direction.
172  // Result is setting of the neighbours and vert_possible/horz_possible
173  // flags in the BLOBNBOXes currently in this grid.
174  // This function is called more than once if page orientation is uncertain,
175  // so display_if_debugging is true on the final call to display the results.
176  void FindTextlineFlowDirection(PageSegMode pageseg_mode,
177  bool display_if_debugging);
178 
179  // Sets the neighbours and good_stroke_neighbours members of the blob by
180  // searching close on all 4 sides.
181  // When finding leader dots/dashes, there is a slightly different rule for
182  // what makes a good neighbour.
183  // If activate_line_trap, then line-like objects are found and isolated.
184  void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX* blob);
185 
186  // Sets the good_stroke_neighbours member of the blob if it has a
187  // GoodNeighbour on the given side.
188  // Also sets the neighbour in the blob, whether or not a good one is found.
189  // Return value is the number of neighbours in the line trap size range.
190  // Leaders get extra special lenient treatment.
191  int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX* blob);
192 
193  // Makes the blob to be only horizontal or vertical where evidence
194  // is clear based on gaps of 2nd order neighbours.
195  void SetNeighbourFlows(BLOBNBOX* blob);
196 
197  // Nullify the neighbours in the wrong directions where the direction
198  // is clear-cut based on a distance margin. Good for isolating vertical
199  // text from neighbouring horizontal text.
200  void SimplifyObviousNeighbours(BLOBNBOX* blob);
201 
202  // Smoothes the vertical/horizontal type of the blob based on the
203  // 2nd-order neighbours. If reset_all is true, then all blobs are
204  // changed. Otherwise, only ambiguous blobs are processed.
205  void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate,
206  BLOBNBOX* blob);
207 
208  // Checks the left or right side of the given leader partition and sets the
209  // (opposite) leader_on_right or leader_on_left flags for blobs
210  // that are next to the given side of the given leader partition.
211  void MarkLeaderNeighbours(const ColPartition* part, LeftOrRight side);
212 
213  // Partition creation. Accumulates vertical and horizontal text chains,
214  // puts the remaining blobs in as unknowns, and then merges/splits to
215  // minimize overlap and smoothes the types with neighbours and the color
216  // image if provided. rerotation is used to rotate the coordinate space
217  // back to the nontext_map_ image.
218  // If find_problems is true, detects possible noise pollution by the amount
219  // of partition overlap that is created by the diacritics. If excessive, the
220  // noise is separated out into diacritic blobs, and PFR_NOISE is returned.
221  // [TODO(rays): if the partition overlap is caused by heavy skew, deskews
222  // the components, saves the skew_angle and returns PFR_SKEW.] If the return
223  // is not PFR_OK, the job is incomplete, and FindInitialPartitions must be
224  // called again after cleaning up the partly done work.
225  PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode,
226  const FCOORD& rerotation,
227  bool find_problems, TO_BLOCK* block,
228  BLOBNBOX_LIST* diacritic_blobs,
229  ColPartitionGrid* part_grid,
230  ColPartition_LIST* big_parts,
231  FCOORD* skew_angle);
232  // Detects noise by a significant increase in partition overlap from
233  // pre_overlap to now, and removes noise from the union of all the overlapping
234  // partitions, placing the blobs in diacritic_blobs. Returns true if any noise
235  // was found and removed.
236  bool DetectAndRemoveNoise(int pre_overlap, const TBOX& grid_box,
237  TO_BLOCK* block, ColPartitionGrid* part_grid,
238  BLOBNBOX_LIST* diacritic_blobs);
239  // Finds vertical chains of text-like blobs and puts them in ColPartitions.
240  void FindVerticalTextChains(ColPartitionGrid* part_grid);
241  // Finds horizontal chains of text-like blobs and puts them in ColPartitions.
242  void FindHorizontalTextChains(ColPartitionGrid* part_grid);
243  // Finds diacritics and saves their base character in the blob.
244  void TestDiacritics(ColPartitionGrid* part_grid, TO_BLOCK* block);
245  // Searches this grid for an appropriately close and sized neighbour of the
246  // given [small] blob. If such a blob is found, the diacritic base is saved
247  // in the blob and true is returned.
248  // The small_grid is a secondary grid that contains the small/noise objects
249  // that are not in this grid, but may be useful for determining a connection
250  // between blob and its potential base character. (See DiacriticXGapFilled.)
251  bool DiacriticBlob(BlobGrid* small_grid, BLOBNBOX* blob);
252  // Returns true if there is no gap between the base char and the diacritic
253  // bigger than a fraction of the height of the base char:
254  // Eg: line end.....'
255  // The quote is a long way from the end of the line, yet it needs to be a
256  // diacritic. To determine that the quote is not part of an image, or
257  // a different text block, we check for other marks in the gap between
258  // the base char and the diacritic.
259  // '<--Diacritic
260  // |---------|
261  // | |<-toobig-gap->
262  // | Base |<ok gap>
263  // |---------| x<-----Dot occupying gap
264  // The grid is const really.
265  bool DiacriticXGapFilled(BlobGrid* grid, const TBOX& diacritic_box,
266  const TBOX& base_box);
267  // Merges diacritics with the ColPartition of the base character blob.
268  void MergeDiacritics(TO_BLOCK* block, ColPartitionGrid* part_grid);
269  // Any blobs on the large_blobs list of block that are still unowned by a
270  // ColPartition, are probably drop-cap or vertically touching so the blobs
271  // are removed to the big_parts list and treated separately.
272  void RemoveLargeUnusedBlobs(TO_BLOCK* block,
273  ColPartitionGrid* part_grid,
274  ColPartition_LIST* big_parts);
275 
276  // All remaining unused blobs are put in individual ColPartitions.
277  void PartitionRemainingBlobs(PageSegMode pageseg_mode,
278  ColPartitionGrid* part_grid);
279 
280  // If combine, put all blobs in the cell_list into a single partition,
281  // otherwise put each one into its own partition.
282  void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine,
283  ColPartitionGrid* part_grid,
284  BLOBNBOX_CLIST* cell_list);
285 
286  // Helper function to finish setting up a ColPartition and insert into
287  // part_grid.
288  void CompletePartition(PageSegMode pageseg_mode, ColPartition* part,
289  ColPartitionGrid* part_grid);
290 
291  // Helper returns true if we are looking only for vertical textlines,
292  // taking into account any rotation that has been done.
293  bool FindingVerticalOnly(PageSegMode pageseg_mode) const {
294  if (rerotation_.y() == 0.0f) {
295  return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
296  }
297  return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
298  pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
299  }
300  // Helper returns true if we are looking only for horizontal textlines,
301  // taking into account any rotation that has been done.
302  bool FindingHorizontalOnly(PageSegMode pageseg_mode) const {
303  if (rerotation_.y() == 0.0f) {
304  return !PSM_ORIENTATION_ENABLED(pageseg_mode) &&
305  pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;
306  }
307  return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;
308  }
309 
310  // Merge partitions where the merge appears harmless.
311  void EasyMerges(ColPartitionGrid* part_grid);
312 
313  // Compute a search box based on the orientation of the partition.
314  // Returns true if a suitable box can be calculated.
315  // Callback for EasyMerges.
316  bool OrientationSearchBox(ColPartition* part, TBOX* box);
317 
318  // Merge confirmation callback for EasyMerges.
319  bool ConfirmEasyMerge(const ColPartition* p1, const ColPartition* p2);
320 
321  // Returns true if there is no significant noise in between the boxes.
322  bool NoNoiseInBetween(const TBOX& box1, const TBOX& box2) const;
323 
324  // Displays the blobs colored according to the number of good neighbours
325  // and the vertical/horizontal flow.
326  ScrollView* DisplayGoodBlobs(const char* window_name, int x, int y);
327 
328  // Displays blobs colored according to whether or not they are diacritics.
329  ScrollView* DisplayDiacritics(const char* window_name,
330  int x, int y, TO_BLOCK* block);
331 
332  private:
333  // Image map of photo/noise areas on the page. Borrowed pointer (not owned.)
334  Pix* nontext_map_;
335  // Textline projection map. Borrowed pointer.
336  TextlineProjection* projection_;
337  // DENORM used by projection_ to get back to image coords. Borrowed pointer.
338  const DENORM* denorm_;
339  // Bounding box of the grid.
340  TBOX grid_box_;
341  // Rerotation to get back to the original image.
342  FCOORD rerotation_;
343  // Windows for debug display.
344  ScrollView* leaders_win_;
345  ScrollView* initial_widths_win_;
346  ScrollView* widths_win_;
347  ScrollView* chains_win_;
348  ScrollView* diacritics_win_;
349  ScrollView* textlines_win_;
350  ScrollView* smoothed_win_;
351 };
352 
353 } // namespace tesseract.
354 
355 #endif // TESSERACT_TEXTORD_STROKEWIDTH_H__
const ICOORD & tright() const
Definition: bbgrid.h:75
integer coordinate
Definition: points.h:30
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
PartitionFindResult
Definition: strokewidth.h:46
StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void RemoveLineResidue(ColPartition_LIST *big_part_list)
bool PSM_ORIENTATION_ENABLED(int pageseg_mode)
Definition: publictypes.h:182
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
virtual void HandleClick(int x, int y)
float y() const
Definition: points.h:212
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
BlobNeighbourDir
Definition: blobbox.h:72
int gridsize() const
Definition: bbgrid.h:63
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
Definition: points.h:189
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
const ICOORD & bleft() const
Definition: bbgrid.h:72
Definition: rect.h:30