tesseract  3.05.02
baselinedetect.h
Go to the documentation of this file.
1 // File: baselinedetect.h
3 // Description: Initial Baseline Determination.
4 // Copyright 2012 Google Inc. All Rights Reserved.
5 // Author: rays@google.com (Ray Smith)
6 // Created: Mon Apr 30 10:03:19 PDT 2012
7 //
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 #ifndef TESSERACT_TEXTORD_BASELINEDETECT_H_
21 #define TESSERACT_TEXTORD_BASELINEDETECT_H_
22 
23 #include "detlinefit.h"
24 #include "genericvector.h"
25 #include "points.h"
26 #include "rect.h"
27 #include "strngs.h"
28 
29 class BLOBNBOX_LIST;
30 class TO_BLOCK;
31 class TO_BLOCK_LIST;
32 class TO_ROW;
33 struct Pix;
34 
35 namespace tesseract {
36 
37 class Textord;
38 
39 // Class to compute and hold baseline data for a TO_ROW.
40 class BaselineRow {
41  public:
42  BaselineRow(double line_size, TO_ROW* to_row);
43 
44  const TBOX& bounding_box() const {
45  return bounding_box_;
46  }
47  // Sets the TO_ROW with the output straight line.
48  void SetupOldLineParameters(TO_ROW* row) const;
49 
50  // Outputs diagnostic information.
51  void Print() const;
52 
53  // Returns the skew angle (in radians) of the current baseline in [-pi,pi].
54  double BaselineAngle() const;
55  // Computes and returns the linespacing at the middle of the overlap
56  // between this and other.
57  double SpaceBetween(const BaselineRow& other) const;
58  // Computes and returns the displacement of the center of the line
59  // perpendicular to the given direction.
60  double PerpDisp(const FCOORD& direction) const;
61  // Computes the y coordinate at the given x using the straight baseline
62  // defined by baseline1_ and baseline2_.
63  double StraightYAtX(double x) const;
64 
65  // Fits a straight baseline to the points. Returns true if it had enough
66  // points to be reasonably sure of the fitted baseline.
67  // If use_box_bottoms is false, baselines positions are formed by
68  // considering the outlines of the blobs.
69  bool FitBaseline(bool use_box_bottoms);
70  // Modifies an existing result of FitBaseline to be parallel to the given
71  // vector if that produces a better result.
72  void AdjustBaselineToParallel(int debug, const FCOORD& direction);
73  // Modifies the baseline to snap to the textline grid if the existing
74  // result is not good enough.
75  double AdjustBaselineToGrid(int debug, const FCOORD& direction,
76  double line_spacing, double line_offset);
77 
78  private:
79  // Sets up displacement_modes_ with the top few modes of the perpendicular
80  // distance of each blob from the given direction vector, after rounding.
81  void SetupBlobDisplacements(const FCOORD& direction);
82 
83  // Fits a line in the given direction to blobs that are close to the given
84  // target_offset perpendicular displacement from the direction. The fit
85  // error is allowed to be cheat_allowance worse than the existing fit, and
86  // will still be used.
87  // If cheat_allowance > 0, the new fit will be good and replace the current
88  // fit if it has better fit (with cheat) OR its error is below
89  // max_baseline_error_ and the old fit is marked bad.
90  // Otherwise the new fit will only replace the old if it is really better,
91  // or the old fit is marked bad and the new fit has sufficient points, as
92  // well as being within the max_baseline_error_.
93  void FitConstrainedIfBetter(int debug, const FCOORD& direction,
94  double cheat_allowance,
95  double target_offset);
96  // Returns the perpendicular distance of the point from the straight
97  // baseline.
98  double PerpDistanceFromBaseline(const FCOORD& pt) const;
99  // Computes the bounding box of the row.
100  void ComputeBoundingBox();
101 
102  // The blobs of the row to which this BaselineRow adds extra information
103  // during baseline fitting. Note that blobs_ could easily come from either
104  // a TO_ROW or a ColPartition.
105  BLOBNBOX_LIST* blobs_;
106  // Bounding box of all the blobs.
107  TBOX bounding_box_;
108  // Fitter used to fit lines to the blobs.
109  DetLineFit fitter_;
110  // 2 points on the straight baseline.
111  FCOORD baseline_pt1_;
112  FCOORD baseline_pt2_;
113  // Set of modes of displacements. They indicate preferable baseline positions.
114  GenericVector<double> displacement_modes_;
115  // Quantization factor used for displacement_modes_.
116  double disp_quant_factor_;
117  // Half the acceptance range of blob displacements for computing the
118  // error during a constrained fit.
119  double fit_halfrange_;
120  // Max baseline error before a line is regarded as fitting badly.
121  double max_baseline_error_;
122  // The error of fit of the baseline.
123  double baseline_error_;
124  // True if this row seems to have a good baseline.
125  bool good_baseline_;
126 };
127 
128 // Class to compute and hold baseline data for a TO_BLOCK.
130  public:
131  BaselineBlock(int debug_level, bool non_text, TO_BLOCK* block);
132 
133  TO_BLOCK* block() const {
134  return block_;
135  }
136  double skew_angle() const {
137  return skew_angle_;
138  }
139 
140  // Computes and returns the absolute error of the given perp_disp from the
141  // given linespacing model.
142  static double SpacingModelError(double perp_disp, double line_spacing,
143  double line_offset);
144 
145  // Fits straight line baselines and computes the skew angle from the
146  // median angle. Returns true if a good angle is found.
147  // If use_box_bottoms is false, baseline positions are formed by
148  // considering the outlines of the blobs.
149  bool FitBaselinesAndFindSkew(bool use_box_bottoms);
150 
151  // Refits the baseline to a constrained angle, using the stored block
152  // skew if good enough, otherwise the supplied default skew.
153  void ParallelizeBaselines(double default_block_skew);
154 
155  // Sets the parameters in TO_BLOCK that are needed by subsequent processes.
156  void SetupBlockParameters() const;
157 
158  // Processing that is required before fitting baseline splines, but requires
159  // linear baselines in order to be successful:
160  // Removes noise if required
161  // Separates out underlines
162  // Pre-associates blob fragments.
163  // TODO(rays/joeliu) This entire section of code is inherited from the past
164  // and could be improved/eliminated.
165  // page_tr is used to size a debug window.
166  void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise);
167 
168  // Fits splines to the textlines, or creates fake QSPLINES from the straight
169  // baselines that are already on the TO_ROWs.
170  // As a side-effect, computes the xheights of the rows and the block.
171  // Although x-height estimation is conceptually separate, it is part of
172  // detecting perspective distortion and therefore baseline fitting.
173  void FitBaselineSplines(bool enable_splines, bool show_final_rows,
174  Textord* textord);
175 
176  // Draws the (straight) baselines and final blobs colored according to
177  // what was discarded as noise and what is associated with each row.
178  void DrawFinalRows(const ICOORD& page_tr);
179 
180  // Render the generated spline baselines for this block on pix_in.
181  void DrawPixSpline(Pix* pix_in);
182 
183  private:
184  // Top-level line-spacing calculation. Computes an estimate of the line-
185  // spacing, using the current baselines in the TO_ROWS of the block, and
186  // then refines it by fitting a regression line to the baseline positions
187  // as a function of their integer index.
188  // Returns true if it seems that the model is a reasonable fit to the
189  // observations.
190  bool ComputeLineSpacing();
191 
192  // Computes the deskewed vertical position of each baseline in the block and
193  // stores them in the given vector.
194  void ComputeBaselinePositions(const FCOORD& direction,
195  GenericVector<double>* positions);
196 
197  // Computes an estimate of the line spacing of the block from the median
198  // of the spacings between adjacent overlapping textlines.
199  void EstimateLineSpacing();
200 
201  // Refines the line spacing of the block by fitting a regression
202  // line to the deskewed y-position of each baseline as a function of its
203  // estimated line index, allowing for a small error in the initial linespacing
204  // and choosing the best available model.
205  void RefineLineSpacing(const GenericVector<double>& positions);
206 
207  // Given an initial estimate of line spacing (m_in) and the positions of each
208  // baseline, computes the line spacing of the block more accurately in m_out,
209  // and the corresponding intercept in c_out, and the number of spacings seen
210  // in index_delta. Returns the error of fit to the line spacing model.
211  double FitLineSpacingModel(const GenericVector<double>& positions,
212  double m_in, double* m_out, double* c_out,
213  int* index_delta);
214 
215 
216  // The block to which this class adds extra information used during baseline
217  // calculation.
218  TO_BLOCK* block_;
219  // The rows in the block that we will be working with.
221  // Amount of debugging output to provide.
222  int debug_level_;
223  // True if the block is non-text (graphic).
224  bool non_text_block_;
225  // True if the block has at least one good enough baseline to compute the
226  // skew angle and therefore skew_angle_ is valid.
227  bool good_skew_angle_;
228  // Angle of skew in radians using the conventional anticlockwise from x-axis.
229  double skew_angle_;
230  // Current best estimate line spacing in pixels perpendicular to skew_angle_.
231  double line_spacing_;
232  // Offset for baseline positions, in pixels. Each baseline is at
233  // line_spacing_ * n + line_offset_ for integer n, which represents
234  // [textline] line number in a line numbering system that has line 0 on or
235  // at least near the x-axis. Not equal to the actual line number of a line
236  // within a block as most blocks are not near the x-axis.
237  double line_offset_;
238  // The error of the line spacing model.
239  double model_error_;
240 };
241 
243  public:
244  BaselineDetect(int debug_level, const FCOORD& page_skew,
245  TO_BLOCK_LIST* blocks);
246 
247  ~BaselineDetect();
248 
249  // Finds the initial baselines for each TO_ROW in each TO_BLOCK, gathers
250  // block-wise and page-wise data to smooth small blocks/rows, and applies
251  // smoothing based on block/page-level skew and block-level linespacing.
252  void ComputeStraightBaselines(bool use_box_bottoms);
253 
254  // Computes the baseline splines for each TO_ROW in each TO_BLOCK and
255  // other associated side-effects, including pre-associating blobs, computing
256  // x-heights and displaying debug information.
257  // NOTE that ComputeStraightBaselines must have been called first as this
258  // sets up data in the TO_ROWs upon which this function depends.
259  void ComputeBaselineSplinesAndXheights(const ICOORD& page_tr,
260  bool enable_splines,
261  bool remove_noise,
262  bool show_final_rows,
263  Textord* textord);
264 
265  // Set up the image and filename, so that a debug image with the detected
266  // baseline rendered will be saved.
267  void SetDebugImage(Pix* pixIn, const STRING& output_path);
268 
269  private:
270  // Average (median) skew of the blocks on the page among those that have
271  // a good angle of their own.
272  FCOORD page_skew_;
273  // Amount of debug output to produce.
274  int debug_level_;
275  // The blocks that we are working with.
277 
278  Pix* pix_debug_;
279  STRING debug_file_prefix_;
280 };
281 
282 } // namespace tesseract
283 
284 #endif // TESSERACT_TEXTORD_BASELINEDETECT_H_
void ComputeStraightBaselines(bool use_box_bottoms)
bool FitBaseline(bool use_box_bottoms)
integer coordinate
Definition: points.h:30
BaselineDetect(int debug_level, const FCOORD &page_skew, TO_BLOCK_LIST *blocks)
TO_BLOCK * block() const
double AdjustBaselineToGrid(int debug, const FCOORD &direction, double line_spacing, double line_offset)
void ParallelizeBaselines(double default_block_skew)
static double SpacingModelError(double perp_disp, double line_spacing, double line_offset)
void FitBaselineSplines(bool enable_splines, bool show_final_rows, Textord *textord)
void DrawFinalRows(const ICOORD &page_tr)
const TBOX & bounding_box() const
void PrepareForSplineFitting(ICOORD page_tr, bool remove_noise)
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
Definition: strngs.h:44
double PerpDisp(const FCOORD &direction) const
Definition: points.h:189
double StraightYAtX(double x) const
void SetupOldLineParameters(TO_ROW *row) const
BaselineRow(double line_size, TO_ROW *to_row)
void DrawPixSpline(Pix *pix_in)
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
void SetDebugImage(Pix *pixIn, const STRING &output_path)
BaselineBlock(int debug_level, bool non_text, TO_BLOCK *block)
double SpaceBetween(const BaselineRow &other) const
bool FitBaselinesAndFindSkew(bool use_box_bottoms)
Definition: rect.h:30
void AdjustBaselineToParallel(int debug, const FCOORD &direction)
double BaselineAngle() const