tesseract  3.05.02
underlin.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: underlin.cpp (Formerly undrline.c)
3  * Description: Code to chop blobs apart from underlines.
4  * Author: Ray Smith
5  * Created: Mon Aug 8 11:14:00 BST 1994
6  *
7  * (C) Copyright 1994, Hewlett-Packard Ltd.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #ifdef __UNIX__
21 #include <assert.h>
22 #endif
23 #include "underlin.h"
24 
25 #define PROJECTION_MARGIN 10 //arbitrary
26 #define EXTERN
27 
28 EXTERN double_VAR (textord_underline_offset, 0.1, "Fraction of x to ignore");
30 "Chop underlines & put back");
31 
32 /**********************************************************************
33  * restore_underlined_blobs
34  *
35  * Find underlined blobs and put them back in the row.
36  **********************************************************************/
37 
38 void restore_underlined_blobs( //get chop points
39  TO_BLOCK *block //block to do
40  ) {
41  inT16 chop_coord; //chop boundary
42  TBOX blob_box; //of underline
43  BLOBNBOX *u_line; //underline bit
44  TO_ROW *row; //best row for blob
45  ICOORDELT_LIST chop_cells; //blobs to cut out
46  //real underlines
47  BLOBNBOX_LIST residual_underlines;
48  C_OUTLINE_LIST left_coutlines;
49  C_OUTLINE_LIST right_coutlines;
50  ICOORDELT_IT cell_it = &chop_cells;
51  //under lines
52  BLOBNBOX_IT under_it = &block->underlines;
53  BLOBNBOX_IT ru_it = &residual_underlines;
54 
55  if (block->get_rows()->empty())
56  return; // Don't crash if there are no rows.
57  for (under_it.mark_cycle_pt (); !under_it.cycled_list ();
58  under_it.forward ()) {
59  u_line = under_it.extract ();
60  blob_box = u_line->bounding_box ();
61  row = most_overlapping_row (block->get_rows (), u_line);
62  if (row == NULL)
63  return; // Don't crash if there is no row.
64  find_underlined_blobs (u_line, &row->baseline, row->xheight,
66  &chop_cells);
67  cell_it.set_to_list (&chop_cells);
68  for (cell_it.mark_cycle_pt (); !cell_it.cycled_list ();
69  cell_it.forward ()) {
70  chop_coord = cell_it.data ()->x ();
71  if (cell_it.data ()->y () - chop_coord > textord_fp_chop_error + 1) {
72  split_to_blob (u_line, chop_coord,
74  &left_coutlines,
75  &right_coutlines);
76  if (!left_coutlines.empty()) {
77  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
78  }
79  chop_coord = cell_it.data ()->y ();
80  split_to_blob(NULL, chop_coord, textord_fp_chop_error + 0.5,
81  &left_coutlines, &right_coutlines);
82  if (!left_coutlines.empty()) {
83  row->insert_blob(new BLOBNBOX(new C_BLOB(&left_coutlines)));
84  }
85  u_line = NULL; //no more blobs to add
86  }
87  delete cell_it.extract();
88  }
89  if (!right_coutlines.empty ()) {
90  split_to_blob(NULL, blob_box.right(), textord_fp_chop_error + 0.5,
91  &left_coutlines, &right_coutlines);
92  if (!left_coutlines.empty())
93  ru_it.add_after_then_move(new BLOBNBOX(new C_BLOB(&left_coutlines)));
94  }
95  if (u_line != NULL) {
96  if (u_line->cblob() != NULL)
97  delete u_line->cblob();
98  delete u_line;
99  }
100  }
101  if (!ru_it.empty()) {
102  ru_it.move_to_first();
103  for (ru_it.mark_cycle_pt(); !ru_it.cycled_list(); ru_it.forward()) {
104  under_it.add_after_then_move(ru_it.extract());
105  }
106  }
107 }
108 
109 
110 /**********************************************************************
111  * most_overlapping_row
112  *
113  * Return the row which most overlaps the blob.
114  **********************************************************************/
115 
116 TO_ROW *most_overlapping_row( //find best row
117  TO_ROW_LIST *rows, //list of rows
118  BLOBNBOX *blob //blob to place
119  ) {
120  inT16 x = (blob->bounding_box ().left ()
121  + blob->bounding_box ().right ()) / 2;
122  TO_ROW_IT row_it = rows; //row iterator
123  TO_ROW *row; //current row
124  TO_ROW *best_row; //output row
125  float overlap; //of blob & row
126  float bestover; //best overlap
127 
128  best_row = NULL;
129  bestover = (float) -MAX_INT32;
130  if (row_it.empty ())
131  return NULL;
132  row = row_it.data ();
133  row_it.mark_cycle_pt ();
134  while (row->baseline.y (x) + row->descdrop > blob->bounding_box ().top ()
135  && !row_it.cycled_list ()) {
136  best_row = row;
137  bestover =
138  blob->bounding_box ().top () - row->baseline.y (x) + row->descdrop;
139  row_it.forward ();
140  row = row_it.data ();
141  }
142  while (row->baseline.y (x) + row->xheight + row->ascrise
143  >= blob->bounding_box ().bottom () && !row_it.cycled_list ()) {
144  overlap = row->baseline.y (x) + row->xheight + row->ascrise;
145  if (blob->bounding_box ().top () < overlap)
146  overlap = blob->bounding_box ().top ();
147  if (blob->bounding_box ().bottom () >
148  row->baseline.y (x) + row->descdrop)
149  overlap -= blob->bounding_box ().bottom ();
150  else
151  overlap -= row->baseline.y (x) + row->descdrop;
152  if (overlap > bestover) {
153  bestover = overlap;
154  best_row = row;
155  }
156  row_it.forward ();
157  row = row_it.data ();
158  }
159  if (bestover < 0
160  && row->baseline.y (x) + row->xheight + row->ascrise
161  - blob->bounding_box ().bottom () > bestover)
162  best_row = row;
163  return best_row;
164 }
165 
166 
167 /**********************************************************************
168  * find_underlined_blobs
169  *
170  * Find the start and end coords of blobs in the underline.
171  **********************************************************************/
172 
173 void find_underlined_blobs( //get chop points
174  BLOBNBOX *u_line, //underlined unit
175  QSPLINE *baseline, //actual baseline
176  float xheight, //height of line
177  float baseline_offset, //amount to shrinke it
178  ICOORDELT_LIST *chop_cells //places to chop
179  ) {
180  inT16 x, y; //sides of blob
181  ICOORD blob_chop; //sides of blob
182  TBOX blob_box = u_line->bounding_box ();
183  //cell iterator
184  ICOORDELT_IT cell_it = chop_cells;
185  STATS upper_proj (blob_box.left (), blob_box.right () + 1);
186  STATS middle_proj (blob_box.left (), blob_box.right () + 1);
187  STATS lower_proj (blob_box.left (), blob_box.right () + 1);
188  C_OUTLINE_IT out_it; //outlines of blob
189 
190  ASSERT_HOST (u_line->cblob () != NULL);
191 
192  out_it.set_to_list (u_line->cblob ()->out_list ());
193  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
194  vertical_cunderline_projection (out_it.data (),
195  baseline, xheight, baseline_offset,
196  &lower_proj, &middle_proj, &upper_proj);
197  }
198 
199  for (x = blob_box.left (); x < blob_box.right (); x++) {
200  if (middle_proj.pile_count (x) > 0) {
201  for (y = x + 1;
202  y < blob_box.right () && middle_proj.pile_count (y) > 0; y++);
203  blob_chop = ICOORD (x, y);
204  cell_it.add_after_then_move (new ICOORDELT (blob_chop));
205  x = y;
206  }
207  }
208 }
209 
210 
211 /**********************************************************************
212  * vertical_cunderline_projection
213  *
214  * Compute the vertical projection of a outline from its outlines
215  * and add to the given STATS.
216  **********************************************************************/
217 
218 void vertical_cunderline_projection( //project outlines
219  C_OUTLINE *outline, //outline to project
220  QSPLINE *baseline, //actual baseline
221  float xheight, //height of line
222  float baseline_offset, //amount to shrinke it
223  STATS *lower_proj, //below baseline
224  STATS *middle_proj, //centre region
225  STATS *upper_proj //top region
226  ) {
227  ICOORD pos; //current point
228  ICOORD step; //edge step
229  inT16 lower_y, upper_y; //region limits
230  inT32 length; //of outline
231  inT16 stepindex; //current step
232  C_OUTLINE_IT out_it = outline->child ();
233 
234  pos = outline->start_pos ();
235  length = outline->pathlength ();
236  for (stepindex = 0; stepindex < length; stepindex++) {
237  step = outline->step (stepindex);
238  if (step.x () > 0) {
239  lower_y =
240  (inT16) floor (baseline->y (pos.x ()) + baseline_offset + 0.5);
241  upper_y =
242  (inT16) floor (baseline->y (pos.x ()) + baseline_offset +
243  xheight + 0.5);
244  if (pos.y () >= lower_y) {
245  lower_proj->add (pos.x (), -lower_y);
246  if (pos.y () >= upper_y) {
247  middle_proj->add (pos.x (), lower_y - upper_y);
248  upper_proj->add (pos.x (), upper_y - pos.y ());
249  }
250  else
251  middle_proj->add (pos.x (), lower_y - pos.y ());
252  }
253  else
254  lower_proj->add (pos.x (), -pos.y ());
255  }
256  else if (step.x () < 0) {
257  lower_y =
258  (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
259  0.5);
260  upper_y =
261  (inT16) floor (baseline->y (pos.x () - 1) + baseline_offset +
262  xheight + 0.5);
263  if (pos.y () >= lower_y) {
264  lower_proj->add (pos.x () - 1, lower_y);
265  if (pos.y () >= upper_y) {
266  middle_proj->add (pos.x () - 1, upper_y - lower_y);
267  upper_proj->add (pos.x () - 1, pos.y () - upper_y);
268  }
269  else
270  middle_proj->add (pos.x () - 1, pos.y () - lower_y);
271  }
272  else
273  lower_proj->add (pos.x () - 1, pos.y ());
274  }
275  pos += step;
276  }
277 
278  for (out_it.mark_cycle_pt (); !out_it.cycled_list (); out_it.forward ()) {
279  vertical_cunderline_projection (out_it.data (),
280  baseline, xheight, baseline_offset,
281  lower_proj, middle_proj, upper_proj);
282  }
283 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
#define TRUE
Definition: capi.h:45
short inT16
Definition: host.h:33
EXTERN int textord_fp_chop_error
Definition: fpchop.cpp:39
integer coordinate
Definition: points.h:30
C_BLOB * cblob() const
Definition: blobbox.h:253
void add(inT32 value, inT32 count)
Definition: statistc.cpp:101
inT32 pathlength() const
Definition: coutln.h:133
ICOORD step(int index) const
Definition: coutln.h:142
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
void vertical_cunderline_projection(C_OUTLINE *outline, QSPLINE *baseline, float xheight, float baseline_offset, STATS *lower_proj, STATS *middle_proj, STATS *upper_proj)
Definition: underlin.cpp:218
double y(double x) const
Definition: quspline.cpp:217
#define EXTERN
Definition: underlin.cpp:26
inT16 bottom() const
Definition: rect.h:61
BLOBNBOX_LIST underlines
Definition: blobbox.h:769
float ascrise
Definition: blobbox.h:655
inT16 x() const
access function
Definition: points.h:52
void insert_blob(BLOBNBOX *blob)
Definition: blobbox.cpp:764
C_OUTLINE_LIST * child()
Definition: coutln.h:106
inT16 left() const
Definition: rect.h:68
EXTERN double textord_underline_offset
Definition: underlin.cpp:28
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
TO_ROW * most_overlapping_row(TO_ROW_LIST *rows, BLOBNBOX *blob)
Definition: underlin.cpp:116
int inT32
Definition: host.h:35
QSPLINE baseline
Definition: blobbox.h:666
float xheight
Definition: blobbox.h:653
#define MAX_INT32
Definition: host.h:53
EXTERN bool textord_restore_underlines
Definition: underlin.cpp:30
inT16 top() const
Definition: rect.h:54
void split_to_blob(BLOBNBOX *blob, inT16 chop_coord, float pitch_error, C_OUTLINE_LIST *left_coutlines, C_OUTLINE_LIST *right_coutlines)
Definition: fpchop.cpp:243
const ICOORD & start_pos() const
Definition: coutln.h:146
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
void restore_underlined_blobs(TO_BLOCK *block)
Definition: underlin.cpp:38
#define double_VAR(name, val, comment)
Definition: params.h:286
#define BOOL_VAR(name, val, comment)
Definition: params.h:280
Definition: statistc.h:33
#define ASSERT_HOST(x)
Definition: errcode.h:84
float descdrop
Definition: blobbox.h:656
inT16 y() const
access_function
Definition: points.h:56
void find_underlined_blobs(BLOBNBOX *u_line, QSPLINE *baseline, float xheight, float baseline_offset, ICOORDELT_LIST *chop_cells)
Definition: underlin.cpp:173