tesseract  3.05.02
associate.cpp
Go to the documentation of this file.
1 // File: associate.cpp
3 // Description: Functions for scoring segmentation paths according to
4 // their character widths, gap widths and seam cuts.
5 // Author: Daria Antonova
6 // Created: Mon Mar 8 11:26:43 PDT 2010
7 //
8 // (C) Copyright 2010, Google Inc.
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 
21 
22 #include <stdio.h>
23 #ifdef __UNIX__
24 #include <assert.h>
25 #endif
26 #include <math.h>
27 
28 #include "associate.h"
29 #include "normalis.h"
30 #include "pageres.h"
31 
32 namespace tesseract {
33 
35 const float AssociateUtils::kMinGap = 0.03f;
36 
37 void AssociateUtils::ComputeStats(int col, int row,
38  const AssociateStats *parent_stats,
39  int parent_path_length,
40  bool fixed_pitch,
41  float max_char_wh_ratio,
42  WERD_RES *word_res,
43  bool debug,
44  AssociateStats *stats) {
45  stats->Clear();
46 
47  ASSERT_HOST(word_res != NULL);
48  if (word_res->blob_widths.empty()) {
49  return;
50  }
51  if (debug) {
52  tprintf("AssociateUtils::ComputeStats() for col=%d, row=%d%s\n",
53  col, row, fixed_pitch ? " (fixed pitch)" : "");
54  }
55  float normalizing_height = kBlnXHeight;
56  ROW* blob_row = word_res->blob_row;
57  // TODO(rays/daria) Can unicharset.script_has_xheight be useful here?
58  if (fixed_pitch && blob_row != NULL) {
59  // For fixed pitch language like CJK, we use the full text height
60  // as the normalizing factor so we are not dependent on xheight
61  // calculation.
62  if (blob_row->body_size() > 0.0f) {
63  normalizing_height = word_res->denorm.y_scale() * blob_row->body_size();
64  } else {
65  normalizing_height = word_res->denorm.y_scale() *
66  (blob_row->x_height() + blob_row->ascenders());
67  }
68  if (debug) {
69  tprintf("normalizing height = %g (scale %g xheight %g ascenders %g)\n",
70  normalizing_height, word_res->denorm.y_scale(),
71  blob_row->x_height(), blob_row->ascenders());
72  }
73  }
74  float wh_ratio = word_res->GetBlobsWidth(col, row) / normalizing_height;
75  if (wh_ratio > max_char_wh_ratio) stats->bad_shape = true;
76  // Compute the gap sum for this shape. If there are only negative or only
77  // positive gaps, record their sum in stats->gap_sum. However, if there is
78  // a mixture, record only the sum of the positive gaps.
79  // TODO(antonova): explain fragment.
80  int negative_gap_sum = 0;
81  for (int c = col; c < row; ++c) {
82  int gap = word_res->GetBlobsGap(c);
83  (gap > 0) ? stats->gap_sum += gap : negative_gap_sum += gap;
84  }
85  if (stats->gap_sum == 0) stats->gap_sum = negative_gap_sum;
86  if (debug) {
87  tprintf("wh_ratio=%g (max_char_wh_ratio=%g) gap_sum=%d %s\n",
88  wh_ratio, max_char_wh_ratio, stats->gap_sum,
89  stats->bad_shape ? "bad_shape" : "");
90  }
91  // Compute shape_cost (for fixed pitch mode).
92  if (fixed_pitch) {
93  bool end_row = (row == (word_res->ratings->dimension() - 1));
94 
95  // Ensure that the blob has gaps on the left and the right sides
96  // (except for beginning and ending punctuation) and that there is
97  // no cutting through ink at the blob boundaries.
98  if (col > 0) {
99  float left_gap = word_res->GetBlobsGap(col - 1) / normalizing_height;
100  SEAM *left_seam = word_res->seam_array[col - 1];
101  if ((!end_row && left_gap < kMinGap) || left_seam->priority() > 0.0f) {
102  stats->bad_shape = true;
103  }
104  if (debug) {
105  tprintf("left_gap %g, left_seam %g %s\n", left_gap,
106  left_seam->priority(), stats->bad_shape ? "bad_shape" : "");
107  }
108  }
109  float right_gap = 0.0f;
110  if (!end_row) {
111  right_gap = word_res->GetBlobsGap(row) / normalizing_height;
112  SEAM *right_seam = word_res->seam_array[row];
113  if (right_gap < kMinGap || right_seam->priority() > 0.0f) {
114  stats->bad_shape = true;
115  if (right_gap < kMinGap) stats->bad_fixed_pitch_right_gap = true;
116  }
117  if (debug) {
118  tprintf("right_gap %g right_seam %g %s\n", right_gap,
119  right_seam->priority(), stats->bad_shape ? "bad_shape" : "");
120  }
121  }
122 
123  // Impose additional segmentation penalties if blob widths or gaps
124  // distribution don't fit a fixed-pitch model.
125  // Since we only know the widths and gaps of the path explored so far,
126  // the means and variances are computed for the path so far (not
127  // considering characters to the right of the last character on the path).
128  stats->full_wh_ratio = wh_ratio + right_gap;
129  if (parent_stats != NULL) {
130  stats->full_wh_ratio_total =
131  (parent_stats->full_wh_ratio_total + stats->full_wh_ratio);
132  float mean =
133  stats->full_wh_ratio_total / static_cast<float>(parent_path_length+1);
134  stats->full_wh_ratio_var =
135  parent_stats->full_wh_ratio_var + pow(mean-stats->full_wh_ratio, 2);
136  } else {
137  stats->full_wh_ratio_total = stats->full_wh_ratio;
138  }
139  if (debug) {
140  tprintf("full_wh_ratio %g full_wh_ratio_total %g full_wh_ratio_var %g\n",
141  stats->full_wh_ratio, stats->full_wh_ratio_total,
142  stats->full_wh_ratio_var);
143  }
144 
145  stats->shape_cost =
146  FixedPitchWidthCost(wh_ratio, right_gap, end_row, max_char_wh_ratio);
147 
148  // For some reason Tesseract prefers to treat the whole CJ words
149  // as one blob when the initial segmentation is particularly bad.
150  // This hack is to avoid favoring such states.
151  if (col == 0 && end_row && wh_ratio > max_char_wh_ratio) {
152  stats->shape_cost += 10;
153  }
154  stats->shape_cost += stats->full_wh_ratio_var;
155  if (debug) tprintf("shape_cost %g\n", stats->shape_cost);
156  }
157 }
158 
159 float AssociateUtils::FixedPitchWidthCost(float norm_width,
160  float right_gap,
161  bool end_pos,
162  float max_char_wh_ratio) {
163  float cost = 0.0f;
164  if (norm_width > max_char_wh_ratio) cost += norm_width;
165  if (norm_width > kMaxFixedPitchCharAspectRatio)
166  cost += norm_width * norm_width; // extra penalty for merging CJK chars
167  // Penalize skinny blobs, except for punctuation in the last position.
168  if (norm_width+right_gap < 0.5f && !end_pos) {
169  cost += 1.0f - (norm_width + right_gap);
170  }
171  return cost;
172 }
173 
174 } // namespace tesseract
float priority() const
Definition: seam.h:65
int dimension() const
Definition: matrix.h:530
float y_scale() const
Definition: normalis.h:272
int GetBlobsWidth(int start_blob, int last_blob)
Definition: pageres.cpp:722
float body_size() const
Definition: ocrrow.h:70
GenericVector< SEAM * > seam_array
Definition: pageres.h:203
static const float kMinGap
Definition: associate.h:70
MATRIX * ratings
Definition: pageres.h:215
ROW * blob_row
Definition: pageres.h:186
const int kBlnXHeight
Definition: normalis.h:28
static float FixedPitchWidthCost(float norm_width, float right_gap, bool end_pos, float max_char_wh_ratio)
Definition: associate.cpp:159
int GetBlobsGap(int blob_index)
Definition: pageres.cpp:732
#define tprintf(...)
Definition: tprintf.h:31
Definition: ocrrow.h:32
Definition: seam.h:44
GenericVector< int > blob_widths
Definition: pageres.h:205
float x_height() const
Definition: ocrrow.h:61
static const float kMaxFixedPitchCharAspectRatio
Definition: associate.h:69
bool empty() const
Definition: genericvector.h:84
float ascenders() const
Definition: ocrrow.h:79
DENORM denorm
Definition: pageres.h:190
#define ASSERT_HOST(x)
Definition: errcode.h:84
static void ComputeStats(int col, int row, const AssociateStats *parent_stats, int parent_path_length, bool fixed_pitch, float max_char_wh_ratio, WERD_RES *word_res, bool debug, AssociateStats *stats)
Definition: associate.cpp:37