tesseract  3.05.02
textord.cpp
Go to the documentation of this file.
1 // File: textord.cpp
3 // Description: The top-level text line and word finding functionality.
4 // Author: Ray Smith
5 // Created: Fri Mar 13 14:43:01 PDT 2009
6 //
7 // (C) Copyright 2009, Google Inc.
8 // Licensed under the Apache License, Version 2.0 (the "License");
9 // you may not use this file except in compliance with the License.
10 // You may obtain a copy of the License at
11 // http://www.apache.org/licenses/LICENSE-2.0
12 // Unless required by applicable law or agreed to in writing, software
13 // distributed under the License is distributed on an "AS IS" BASIS,
14 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 // See the License for the specific language governing permissions and
16 // limitations under the License.
17 //
19 
20 // Include automatically generated configuration file if running autoconf.
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include "baselinedetect.h"
26 #include "drawtord.h"
27 #include "textord.h"
28 #include "makerow.h"
29 #include "pageres.h"
30 #include "tordmain.h"
31 #include "wordseg.h"
32 
33 namespace tesseract {
34 
36  : ccstruct_(ccstruct),
37  use_cjk_fp_model_(false),
38  // makerow.cpp ///////////////////////////////////////////
39  BOOL_MEMBER(textord_single_height_mode, false,
40  "Script has no xheight, so use a single mode",
41  ccstruct_->params()),
42  // tospace.cpp ///////////////////////////////////////////
43  BOOL_MEMBER(tosp_old_to_method, false, "Space stats use prechopping?",
44  ccstruct_->params()),
45  BOOL_MEMBER(tosp_old_to_constrain_sp_kn, false,
46  "Constrain relative values of inter and intra-word gaps for "
47  "old_to_method.",
48  ccstruct_->params()),
49  BOOL_MEMBER(tosp_only_use_prop_rows, true,
50  "Block stats to use fixed pitch rows?", ccstruct_->params()),
51  BOOL_MEMBER(tosp_force_wordbreak_on_punct, false,
52  "Force word breaks on punct to break long lines in non-space "
53  "delimited langs",
54  ccstruct_->params()),
55  BOOL_MEMBER(tosp_use_pre_chopping, false, "Space stats use prechopping?",
56  ccstruct_->params()),
57  BOOL_MEMBER(tosp_old_to_bug_fix, false, "Fix suspected bug in old code",
58  ccstruct_->params()),
59  BOOL_MEMBER(tosp_block_use_cert_spaces, true, "Only stat OBVIOUS spaces",
60  ccstruct_->params()),
61  BOOL_MEMBER(tosp_row_use_cert_spaces, true, "Only stat OBVIOUS spaces",
62  ccstruct_->params()),
63  BOOL_MEMBER(tosp_narrow_blobs_not_cert, true, "Only stat OBVIOUS spaces",
64  ccstruct_->params()),
65  BOOL_MEMBER(tosp_row_use_cert_spaces1, true, "Only stat OBVIOUS spaces",
66  ccstruct_->params()),
67  BOOL_MEMBER(tosp_recovery_isolated_row_stats, true,
68  "Use row alone when inadequate cert spaces",
69  ccstruct_->params()),
70  BOOL_MEMBER(tosp_only_small_gaps_for_kern, false, "Better guess",
71  ccstruct_->params()),
72  BOOL_MEMBER(tosp_all_flips_fuzzy, false, "Pass ANY flip to context?",
73  ccstruct_->params()),
74  BOOL_MEMBER(tosp_fuzzy_limit_all, true,
75  "Don't restrict kn->sp fuzzy limit to tables",
76  ccstruct_->params()),
77  BOOL_MEMBER(tosp_stats_use_xht_gaps, true,
78  "Use within xht gap for wd breaks", ccstruct_->params()),
79  BOOL_MEMBER(tosp_use_xht_gaps, true, "Use within xht gap for wd breaks",
80  ccstruct_->params()),
81  BOOL_MEMBER(tosp_only_use_xht_gaps, false,
82  "Only use within xht gap for wd breaks", ccstruct_->params()),
83  BOOL_MEMBER(tosp_rule_9_test_punct, false,
84  "Don't chng kn to space next to punct", ccstruct_->params()),
85  BOOL_MEMBER(tosp_flip_fuzz_kn_to_sp, true, "Default flip",
86  ccstruct_->params()),
87  BOOL_MEMBER(tosp_flip_fuzz_sp_to_kn, true, "Default flip",
88  ccstruct_->params()),
89  BOOL_MEMBER(tosp_improve_thresh, false, "Enable improvement heuristic",
90  ccstruct_->params()),
91  INT_MEMBER(tosp_debug_level, 0, "Debug data", ccstruct_->params()),
92  INT_MEMBER(tosp_enough_space_samples_for_median, 3,
93  "or should we use mean", ccstruct_->params()),
94  INT_MEMBER(tosp_redo_kern_limit, 10,
95  "No.samples reqd to reestimate for row", ccstruct_->params()),
96  INT_MEMBER(tosp_few_samples, 40,
97  "No.gaps reqd with 1 large gap to treat as a table",
98  ccstruct_->params()),
99  INT_MEMBER(tosp_short_row, 20,
100  "No.gaps reqd with few cert spaces to use certs",
101  ccstruct_->params()),
102  INT_MEMBER(tosp_sanity_method, 1, "How to avoid being silly",
103  ccstruct_->params()),
104  double_MEMBER(tosp_old_sp_kn_th_factor, 2.0,
105  "Factor for defining space threshold in terms of space and "
106  "kern sizes",
107  ccstruct_->params()),
108  double_MEMBER(tosp_threshold_bias1, 0, "how far between kern and space?",
109  ccstruct_->params()),
110  double_MEMBER(tosp_threshold_bias2, 0, "how far between kern and space?",
111  ccstruct_->params()),
112  double_MEMBER(tosp_narrow_fraction, 0.3, "Fract of xheight for narrow",
113  ccstruct_->params()),
114  double_MEMBER(tosp_narrow_aspect_ratio, 0.48,
115  "narrow if w/h less than this", ccstruct_->params()),
116  double_MEMBER(tosp_wide_fraction, 0.52, "Fract of xheight for wide",
117  ccstruct_->params()),
118  double_MEMBER(tosp_wide_aspect_ratio, 0.0, "wide if w/h less than this",
119  ccstruct_->params()),
120  double_MEMBER(tosp_fuzzy_space_factor, 0.6,
121  "Fract of xheight for fuzz sp", ccstruct_->params()),
122  double_MEMBER(tosp_fuzzy_space_factor1, 0.5,
123  "Fract of xheight for fuzz sp", ccstruct_->params()),
124  double_MEMBER(tosp_fuzzy_space_factor2, 0.72,
125  "Fract of xheight for fuzz sp", ccstruct_->params()),
126  double_MEMBER(tosp_gap_factor, 0.83, "gap ratio to flip sp->kern",
127  ccstruct_->params()),
128  double_MEMBER(tosp_kern_gap_factor1, 2.0, "gap ratio to flip kern->sp",
129  ccstruct_->params()),
130  double_MEMBER(tosp_kern_gap_factor2, 1.3, "gap ratio to flip kern->sp",
131  ccstruct_->params()),
132  double_MEMBER(tosp_kern_gap_factor3, 2.5, "gap ratio to flip kern->sp",
133  ccstruct_->params()),
134  double_MEMBER(tosp_ignore_big_gaps, -1, "xht multiplier",
135  ccstruct_->params()),
136  double_MEMBER(tosp_ignore_very_big_gaps, 3.5, "xht multiplier",
137  ccstruct_->params()),
138  double_MEMBER(tosp_rep_space, 1.6, "rep gap multiplier for space",
139  ccstruct_->params()),
140  double_MEMBER(tosp_enough_small_gaps, 0.65,
141  "Fract of kerns reqd for isolated row stats",
142  ccstruct_->params()),
143  double_MEMBER(tosp_table_kn_sp_ratio, 2.25,
144  "Min difference of kn & sp in table", ccstruct_->params()),
145  double_MEMBER(tosp_table_xht_sp_ratio, 0.33,
146  "Expect spaces bigger than this", ccstruct_->params()),
147  double_MEMBER(tosp_table_fuzzy_kn_sp_ratio, 3.0,
148  "Fuzzy if less than this", ccstruct_->params()),
149  double_MEMBER(tosp_fuzzy_kn_fraction, 0.5, "New fuzzy kn alg",
150  ccstruct_->params()),
151  double_MEMBER(tosp_fuzzy_sp_fraction, 0.5, "New fuzzy sp alg",
152  ccstruct_->params()),
153  double_MEMBER(tosp_min_sane_kn_sp, 1.5,
154  "Don't trust spaces less than this time kn",
155  ccstruct_->params()),
156  double_MEMBER(tosp_init_guess_kn_mult, 2.2,
157  "Thresh guess - mult kn by this", ccstruct_->params()),
158  double_MEMBER(tosp_init_guess_xht_mult, 0.28,
159  "Thresh guess - mult xht by this", ccstruct_->params()),
160  double_MEMBER(tosp_max_sane_kn_thresh, 5.0,
161  "Multiplier on kn to limit thresh", ccstruct_->params()),
162  double_MEMBER(tosp_flip_caution, 0.0,
163  "Don't autoflip kn to sp when large separation",
164  ccstruct_->params()),
165  double_MEMBER(tosp_large_kerning, 0.19,
166  "Limit use of xht gap with large kns", ccstruct_->params()),
167  double_MEMBER(tosp_dont_fool_with_small_kerns, -1,
168  "Limit use of xht gap with odd small kns",
169  ccstruct_->params()),
170  double_MEMBER(tosp_near_lh_edge, 0,
171  "Don't reduce box if the top left is non blank",
172  ccstruct_->params()),
173  double_MEMBER(tosp_silly_kn_sp_gap, 0.2,
174  "Don't let sp minus kn get too small", ccstruct_->params()),
175  double_MEMBER(tosp_pass_wide_fuzz_sp_to_context, 0.75,
176  "How wide fuzzies need context", ccstruct_->params()),
177  // tordmain.cpp ///////////////////////////////////////////
178  BOOL_MEMBER(textord_no_rejects, false, "Don't remove noise blobs",
179  ccstruct_->params()),
180  BOOL_MEMBER(textord_show_blobs, false, "Display unsorted blobs",
181  ccstruct_->params()),
182  BOOL_MEMBER(textord_show_boxes, false, "Display unsorted blobs",
183  ccstruct_->params()),
184  INT_MEMBER(textord_max_noise_size, 7, "Pixel size of noise",
185  ccstruct_->params()),
186  INT_MEMBER(textord_baseline_debug, 0, "Baseline debug level",
187  ccstruct_->params()),
188  double_MEMBER(textord_blob_size_bigile, 95, "Percentile for large blobs",
189  ccstruct_->params()),
190  double_MEMBER(textord_noise_area_ratio, 0.7,
191  "Fraction of bounding box for noise", ccstruct_->params()),
192  double_MEMBER(textord_blob_size_smallile, 20,
193  "Percentile for small blobs", ccstruct_->params()),
194  double_MEMBER(textord_initialx_ile, 0.75,
195  "Ile of sizes for xheight guess", ccstruct_->params()),
196  double_MEMBER(textord_initialasc_ile, 0.90,
197  "Ile of sizes for xheight guess", ccstruct_->params()),
198  INT_MEMBER(textord_noise_sizefraction, 10, "Fraction of size for maxima",
199  ccstruct_->params()),
200  double_MEMBER(textord_noise_sizelimit, 0.5,
201  "Fraction of x for big t count", ccstruct_->params()),
202  INT_MEMBER(textord_noise_translimit, 16, "Transitions for normal blob",
203  ccstruct_->params()),
204  double_MEMBER(textord_noise_normratio, 2.0,
205  "Dot to norm ratio for deletion", ccstruct_->params()),
206  BOOL_MEMBER(textord_noise_rejwords, true, "Reject noise-like words",
207  ccstruct_->params()),
208  BOOL_MEMBER(textord_noise_rejrows, true, "Reject noise-like rows",
209  ccstruct_->params()),
210  double_MEMBER(textord_noise_syfract, 0.2,
211  "xh fract height error for norm blobs",
212  ccstruct_->params()),
213  double_MEMBER(textord_noise_sxfract, 0.4,
214  "xh fract width error for norm blobs", ccstruct_->params()),
215  double_MEMBER(textord_noise_hfract, 1.0 / 64,
216  "Height fraction to discard outlines as speckle noise",
217  ccstruct_->params()),
218  INT_MEMBER(textord_noise_sncount, 1, "super norm blobs to save row",
219  ccstruct_->params()),
220  double_MEMBER(textord_noise_rowratio, 6.0,
221  "Dot to norm ratio for deletion", ccstruct_->params()),
222  BOOL_MEMBER(textord_noise_debug, false, "Debug row garbage detector",
223  ccstruct_->params()),
224  double_MEMBER(textord_blshift_maxshift, 0.00, "Max baseline shift",
225  ccstruct_->params()),
226  double_MEMBER(textord_blshift_xfraction, 9.99,
227  "Min size of baseline shift", ccstruct_->params()) {}
228 
230 }
231 
232 // Make the textlines and words inside each block.
233 void Textord::TextordPage(PageSegMode pageseg_mode, const FCOORD& reskew,
234  int width, int height, Pix* binary_pix,
235  Pix* thresholds_pix, Pix* grey_pix,
236  bool use_box_bottoms, BLOBNBOX_LIST* diacritic_blobs,
237  BLOCK_LIST* blocks, TO_BLOCK_LIST* to_blocks) {
238  page_tr_.set_x(width);
239  page_tr_.set_y(height);
240  if (to_blocks->empty()) {
241  // AutoPageSeg was not used, so we need to find_components first.
242  find_components(binary_pix, blocks, to_blocks);
243  TO_BLOCK_IT it(to_blocks);
244  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
245  TO_BLOCK* to_block = it.data();
246  // Compute the edge offsets whether or not there is a grey_pix.
247  // We have by-passed auto page seg, so we have to run it here.
248  // By page segmentation mode there is no non-text to avoid running on.
249  to_block->ComputeEdgeOffsets(thresholds_pix, grey_pix);
250  }
251  } else if (!PSM_SPARSE(pageseg_mode)) {
252  // AutoPageSeg does not need to find_components as it did that already.
253  // Filter_blobs sets up the TO_BLOCKs the same as find_components does.
254  filter_blobs(page_tr_, to_blocks, true);
255  }
256 
257  ASSERT_HOST(!to_blocks->empty());
258  if (pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT) {
259  const FCOORD anticlockwise90(0.0f, 1.0f);
260  const FCOORD clockwise90(0.0f, -1.0f);
261  TO_BLOCK_IT it(to_blocks);
262  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
263  TO_BLOCK* to_block = it.data();
264  BLOCK* block = to_block->block;
265  // Create a fake poly_block in block from its bounding box.
266  block->set_poly_block(new POLY_BLOCK(block->bounding_box(),
268  // Rotate the to_block along with its contained block and blobnbox lists.
269  to_block->rotate(anticlockwise90);
270  // Set the block's rotation values to obey the convention followed in
271  // layout analysis for vertical text.
272  block->set_re_rotation(clockwise90);
273  block->set_classify_rotation(clockwise90);
274  }
275  }
276 
277  TO_BLOCK_IT to_block_it(to_blocks);
278  TO_BLOCK* to_block = to_block_it.data();
279  // Make the rows in the block.
280  float gradient;
281  // Do it the old fashioned way.
282  if (PSM_LINE_FIND_ENABLED(pageseg_mode)) {
283  gradient = make_rows(page_tr_, to_blocks);
284  } else if (!PSM_SPARSE(pageseg_mode)) {
285  // RAW_LINE, SINGLE_LINE, SINGLE_WORD and SINGLE_CHAR all need a single row.
286  gradient = make_single_row(page_tr_, pageseg_mode != PSM_RAW_LINE,
287  to_block, to_blocks);
288  } else {
289  gradient = 0.0f;
290  }
291  BaselineDetect baseline_detector(textord_baseline_debug,
292  reskew, to_blocks);
293  baseline_detector.ComputeStraightBaselines(use_box_bottoms);
294  baseline_detector.ComputeBaselineSplinesAndXheights(
295  page_tr_, pageseg_mode != PSM_RAW_LINE, textord_heavy_nr,
297  // Now make the words in the lines.
298  if (PSM_WORD_FIND_ENABLED(pageseg_mode)) {
299  // SINGLE_LINE uses the old word maker on the single line.
300  make_words(this, page_tr_, gradient, blocks, to_blocks);
301  } else {
302  // SINGLE_WORD and SINGLE_CHAR cram all the blobs into a
303  // single word, and in SINGLE_CHAR mode, all the outlines
304  // go in a single blob.
305  TO_BLOCK* to_block = to_block_it.data();
306  make_single_word(pageseg_mode == PSM_SINGLE_CHAR,
307  to_block->get_rows(), to_block->block->row_list());
308  }
309  // Remove empties.
310  cleanup_blocks(PSM_WORD_FIND_ENABLED(pageseg_mode), blocks);
311  TransferDiacriticsToBlockGroups(diacritic_blobs, blocks);
312  // Compute the margins for each row in the block, to be used later for
313  // paragraph detection.
314  BLOCK_IT b_it(blocks);
315  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
316  b_it.data()->compute_row_margins();
317  }
318 #ifndef GRAPHICS_DISABLED
319  close_to_win();
320 #endif
321 }
322 
323 // If we were supposed to return only a single textline, and there is more
324 // than one, clean up and leave only the best.
326  PAGE_RES* page_res) {
327  if (PSM_LINE_FIND_ENABLED(pageseg_mode) || PSM_SPARSE(pageseg_mode))
328  return; // No cleanup required.
329  PAGE_RES_IT it(page_res);
330  // Find the best row, being the greatest mean word conf.
331  float row_total_conf = 0.0f;
332  int row_word_count = 0;
333  ROW_RES* best_row = NULL;
334  float best_conf = 0.0f;
335  for (it.restart_page(); it.word() != NULL; it.forward()) {
336  WERD_RES* word = it.word();
337  row_total_conf += word->best_choice->certainty();
338  ++row_word_count;
339  if (it.next_row() != it.row()) {
340  row_total_conf /= row_word_count;
341  if (best_row == NULL || best_conf < row_total_conf) {
342  best_row = it.row();
343  best_conf = row_total_conf;
344  }
345  row_total_conf = 0.0f;
346  row_word_count = 0;
347  }
348  }
349  // Now eliminate any word not in the best row.
350  for (it.restart_page(); it.word() != NULL; it.forward()) {
351  if (it.row() != best_row)
352  it.DeleteCurrentWord();
353  }
354 }
355 
356 } // namespace tesseract.
void ComputeStraightBaselines(bool use_box_bottoms)
WERD_RES * word() const
Definition: pageres.h:736
void close_to_win()
Definition: drawtord.cpp:56
BLOCK * block
Definition: blobbox.h:773
void make_words(tesseract::Textord *textord, ICOORD page_tr, float gradient, BLOCK_LIST *blocks, TO_BLOCK_LIST *port_blocks)
Definition: wordseg.cpp:104
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
float certainty() const
Definition: ratngs.h:328
bool textord_heavy_nr
Definition: makerow.cpp:44
bool PSM_SPARSE(int pageseg_mode)
Definition: publictypes.h:188
void find_components(Pix *pix, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: tordmain.cpp:205
ROW_RES * row() const
Definition: pageres.h:739
Treat the image as a single character.
Definition: publictypes.h:164
WERD_RES * restart_page()
Definition: pageres.h:683
WERD_CHOICE * best_choice
Definition: pageres.h:219
bool PSM_LINE_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:194
bool PSM_WORD_FIND_ENABLED(int pageseg_mode)
Definition: publictypes.h:197
void TextordPage(PageSegMode pageseg_mode, const FCOORD &reskew, int width, int height, Pix *binary_pix, Pix *thresholds_pix, Pix *grey_pix, bool use_box_bottoms, BLOBNBOX_LIST *diacritic_blobs, BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks)
Definition: textord.cpp:233
void CleanupSingleRowResult(PageSegMode pageseg_mode, PAGE_RES *page_res)
Definition: textord.cpp:325
ROW_RES * next_row() const
Definition: pageres.h:748
void filter_blobs(ICOORD page_tr, TO_BLOCK_LIST *blocks, BOOL8 testing_on)
Definition: tordmain.cpp:236
void set_poly_block(POLY_BLOCK *blk)
set the poly block
Definition: pdblock.h:57
#define BOOL_MEMBER(name, val, comment, vec)
Definition: params.h:304
#define INT_MEMBER(name, val, comment, vec)
Definition: params.h:301
void set_re_rotation(const FCOORD &rotation)
Definition: ocrblock.h:141
WERD_RES * forward()
Definition: pageres.h:716
#define double_MEMBER(name, val, comment, vec)
Definition: params.h:310
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
bool textord_show_final_rows
Definition: makerow.cpp:48
float make_rows(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: makerow.cpp:201
void rotate(const FCOORD &rotation)
Definition: blobbox.h:706
int textord_baseline_debug
Definition: textord.h:377
Definition: points.h:189
Definition: ocrblock.h:30
Textord(CCStruct *ccstruct)
Definition: textord.cpp:35
void set_y(inT16 yin)
rewrite function
Definition: points.h:65
void set_classify_rotation(const FCOORD &rotation)
Definition: ocrblock.h:147
void ComputeBaselineSplinesAndXheights(const ICOORD &page_tr, bool enable_splines, bool remove_noise, bool show_final_rows, Textord *textord)
void ComputeEdgeOffsets(Pix *thresholds, Pix *grey)
Definition: blobbox.cpp:1051
void set_x(inT16 xin)
rewrite function
Definition: points.h:61
float make_single_row(ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
Definition: makerow.cpp:164
void DeleteCurrentWord()
Definition: pageres.cpp:1450
void make_single_word(bool one_blob, TO_ROW_LIST *rows, ROW_LIST *real_rows)
Definition: wordseg.cpp:60
#define ASSERT_HOST(x)
Definition: errcode.h:84
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59