tesseract  3.05.02
textlineprojection.h
Go to the documentation of this file.
1 // Copyright 2011 Google Inc. All Rights Reserved.
2 // Author: rays@google.com (Ray Smith)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 #ifndef TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
15 #define TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
16 
17 #include "blobgrid.h" // For BlobGrid
18 
19 class DENORM;
20 struct Pix;
21 struct TPOINT;
22 
23 namespace tesseract {
24 
25 class ColPartition;
26 
27 // Simple class to encapsulate the computation of an image representing
28 // local textline density, and function(s) to make use of it.
29 // The underlying principle is that if you smear connected components
30 // horizontally (vertically for components on a vertically written textline)
31 // and count the number of smeared components in an image, then the resulting
32 // image shows the density of the textlines at each image position.
34  public:
35  // The down-scaling factor is computed to obtain a projection resolution
36  // of about 100 dpi, whatever the input.
37  explicit TextlineProjection(int resolution);
39 
40  // Build the projection profile given the input_block containing lists of
41  // blobs, a rotation to convert to image coords,
42  // and a full-resolution nontext_map, marking out areas to avoid.
43  // During construction, we have the following assumptions:
44  // The rotation is a multiple of 90 degrees, ie no deskew yet.
45  // The blobs have had their left and right rules set to also limit
46  // the range of projection.
47  void ConstructProjection(TO_BLOCK* input_block,
48  const FCOORD& rotation, Pix* nontext_map);
49 
50  // Display the blobs in the window colored according to textline quality.
51  void PlotGradedBlobs(BLOBNBOX_LIST* blobs, ScrollView* win);
52 
53  // Moves blobs that look like they don't sit well on a textline from the
54  // input blobs list to the output small_blobs list.
55  // This gets them away from initial textline finding to stop diacritics
56  // from forming incorrect textlines. (Introduced mainly to fix Thai.)
57  void MoveNonTextlineBlobs(BLOBNBOX_LIST* blobs,
58  BLOBNBOX_LIST* small_blobs) const;
59 
60  // Create a window and display the projection in it.
61  void DisplayProjection() const;
62 
63  // Compute the distance of the box from the partition using curved projection
64  // space. As DistanceOfBoxFromBox, except that the direction is taken from
65  // the ColPartition and the median bounds of the ColPartition are used as
66  // the to_box.
67  int DistanceOfBoxFromPartition(const TBOX& box, const ColPartition& part,
68  const DENORM* denorm, bool debug) const;
69 
70  // Compute the distance from the from_box to the to_box using curved
71  // projection space. Separation that involves a decrease in projection
72  // density (moving from the from_box to the to_box) is weighted more heavily
73  // than constant density, and an increase is weighted less.
74  // If horizontal_textline is true, then curved space is used vertically,
75  // as for a diacritic on the edge of a textline.
76  // The projection uses original image coords, so denorm is used to get
77  // back to the image coords from box/part space.
78  int DistanceOfBoxFromBox(const TBOX& from_box, const TBOX& to_box,
79  bool horizontal_textline,
80  const DENORM* denorm, bool debug) const;
81 
82  // Compute the distance between (x, y1) and (x, y2) using the rule that
83  // a decrease in textline density is weighted more heavily than an increase.
84  // The coordinates are in source image space, ie processed by any denorm
85  // already, but not yet scaled by scale_factor_.
86  // Going from the outside of a textline to the inside should measure much
87  // less distance than going from the inside of a textline to the outside.
88  int VerticalDistance(bool debug, int x, int y1, int y2) const;
89 
90  // Compute the distance between (x1, y) and (x2, y) using the rule that
91  // a decrease in textline density is weighted more heavily than an increase.
92  int HorizontalDistance(bool debug, int x1, int x2, int y) const;
93 
94  // Returns true if the blob appears to be outside of a horizontal textline.
95  // Such blobs are potentially diacritics (even if large in Thai) and should
96  // be kept away from initial textline finding.
97  bool BoxOutOfHTextline(const TBOX& box, const DENORM* denorm,
98  bool debug) const;
99 
100  // Evaluates the textlineiness of a ColPartition. Uses EvaluateBox below,
101  // but uses the median top/bottom for horizontal and median left/right for
102  // vertical instead of the bounding box edges.
103  // Evaluates for both horizontal and vertical and returns the best result,
104  // with a positive value for horizontal and a negative value for vertical.
105  int EvaluateColPartition(const ColPartition& part, const DENORM* denorm,
106  bool debug) const;
107 
108  // Computes the mean projection gradients over the horizontal and vertical
109  // edges of the box:
110  // -h-h-h-h-h-h
111  // |------------| mean=htop -v|+v--------+v|-v
112  // |+h+h+h+h+h+h| -v|+v +v|-v
113  // | | -v|+v +v|-v
114  // | box | -v|+v box +v|-v
115  // | | -v|+v +v|-v
116  // |+h+h+h+h+h+h| -v|+v +v|-v
117  // |------------| mean=hbot -v|+v--------+v|-v
118  // -h-h-h-h-h-h
119  // mean=vleft mean=vright
120  //
121  // Returns MAX(htop,hbot) - MAX(vleft,vright), which is a positive number
122  // for a horizontal textline, a negative number for a vertical textline,
123  // and near zero for undecided. Undecided is most likely non-text.
124  int EvaluateBox(const TBOX& box, const DENORM* denorm, bool debug) const;
125 
126  private:
127  // Internal version of EvaluateBox returns the unclipped gradients as well
128  // as the result of EvaluateBox.
129  // hgrad1 and hgrad2 are the gradients for the horizontal textline.
130  int EvaluateBoxInternal(const TBOX& box, const DENORM* denorm, bool debug,
131  int* hgrad1, int* hgrad2,
132  int* vgrad1, int* vgrad2) const;
133 
134  // Helper returns the mean gradient value for the horizontal row at the given
135  // y, (in the external coordinates) by subtracting the mean of the transformed
136  // row 2 pixels above from the mean of the transformed row 2 pixels below.
137  // This gives a positive value for a good top edge and negative for bottom.
138  // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
139  int BestMeanGradientInRow(const DENORM* denorm, inT16 min_x, inT16 max_x,
140  inT16 y, bool best_is_max) const;
141 
142  // Helper returns the mean gradient value for the vertical column at the
143  // given x, (in the external coordinates) by subtracting the mean of the
144  // transformed column 2 pixels left from the mean of the transformed column
145  // 2 pixels to the right.
146  // This gives a positive value for a good left edge and negative for right.
147  // Returns the best result out of +2/-2, +3/-1, +1/-3 pixels from the edge.
148  int BestMeanGradientInColumn(const DENORM* denorm, inT16 x, inT16 min_y,
149  inT16 max_y, bool best_is_max) const;
150 
151  // Helper returns the mean pixel value over the line between the start_pt and
152  // end_pt (inclusive), but shifted perpendicular to the line in the projection
153  // image by offset pixels. For simplicity, it is assumed that the vector is
154  // either nearly horizontal or nearly vertical. It works on skewed textlines!
155  // The end points are in external coordinates, and will be denormalized with
156  // the denorm if not NULL before further conversion to pix coordinates.
157  // After all the conversions, the offset is added to the direction
158  // perpendicular to the line direction. The offset is thus in projection image
159  // coordinates, which allows the caller to get a guaranteed displacement
160  // between pixels used to calculate gradients.
161  int MeanPixelsInLineSegment(const DENORM* denorm, int offset,
162  TPOINT start_pt, TPOINT end_pt) const;
163 
164  // Helper function to add 1 to a rectangle in source image coords to the
165  // internal projection pix_.
166  void IncrementRectangle8Bit(const TBOX& box);
167  // Inserts a list of blobs into the projection.
168  // Rotation is a multiple of 90 degrees to get from blob coords to
169  // nontext_map coords, image_box is the bounds of the nontext_map.
170  // Blobs are spread horizontally or vertically according to their internal
171  // flags, but the spreading is truncated by set pixels in the nontext_map
172  // and also by the horizontal rule line limits on the blobs.
173  void ProjectBlobs(BLOBNBOX_LIST* blobs, const FCOORD& rotation,
174  const TBOX& image_box, Pix* nontext_map);
175  // Pads the bounding box of the given blob according to whether it is on
176  // a horizontal or vertical text line, taking into account tab-stops near
177  // the blob. Returns true if padding was in the horizontal direction.
178  bool PadBlobBox(BLOBNBOX* blob, TBOX* bbox);
179 
180  // Helper denormalizes the TPOINT with the denorm if not NULL, then
181  // converts to pix_ coordinates.
182  void TransformToPixCoords(const DENORM* denorm, TPOINT* pt) const;
183 
184  // Helper truncates the TPOINT to be within the pix_.
185  void TruncateToImageBounds(TPOINT* pt) const;
186 
187  // Transform tesseract coordinates to coordinates used in the pix.
188  int ImageXToProjectionX(int x) const;
189  int ImageYToProjectionY(int y) const;
190 
191  // The down-sampling scale factor used in building the image.
192  int scale_factor_;
193  // The blob coordinates of the top-left (origin of the pix_) in tesseract
194  // coordinates. Used to transform the bottom-up tesseract coordinates to
195  // the top-down coordinates of the pix.
196  int x_origin_;
197  int y_origin_;
198  // The image of horizontally smeared blob boxes summed to provide a
199  // textline density map. As with a horizontal projection, the map has
200  // dips in the gaps between textlines.
201  Pix* pix_;
202 };
203 
204 } // namespace tesseract.
205 
206 #endif // TESSERACT_TEXTORD_TEXTLINEPROJECTION_H_
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map)
short inT16
Definition: host.h:33
int HorizontalDistance(bool debug, int x1, int x2, int y) const
Definition: blobs.h:50
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
Definition: points.h:189
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
int VerticalDistance(bool debug, int x, int y1, int y2) const
int EvaluateBox(const TBOX &box, const DENORM *denorm, bool debug) const
bool BoxOutOfHTextline(const TBOX &box, const DENORM *denorm, bool debug) const
int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const
Definition: rect.h:30
int DistanceOfBoxFromPartition(const TBOX &box, const ColPartition &part, const DENORM *denorm, bool debug) const
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const