tesseract  3.05.02
cjkpitch.cpp
Go to the documentation of this file.
1 // File: cjkpitch.cpp
3 // Description: Code to determine fixed pitchness and the pitch if fixed,
4 // for CJK text.
5 // Copyright 2011 Google Inc. All Rights Reserved.
6 // Author: takenaka@google.com (Hiroshi Takenaka)
7 // Created: Mon Jun 27 12:48:35 JST 2011
8 //
9 // Licensed under the Apache License, Version 2.0 (the "License");
10 // you may not use this file except in compliance with the License.
11 // You may obtain a copy of the License at
12 // http://www.apache.org/licenses/LICENSE-2.0
13 // Unless required by applicable law or agreed to in writing, software
14 // distributed under the License is distributed on an "AS IS" BASIS,
15 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 // See the License for the specific language governing permissions and
17 // limitations under the License.
18 //
20 #include "cjkpitch.h"
21 #include "genericvector.h"
22 #include "ndminx.h"
23 #include "topitch.h"
24 #include "tovars.h"
25 
27  "If true, word delimiter spaces are assumed to have "
28  "variable width, even though characters have fixed pitch.");
29 
30 namespace {
31 
32 // Allow +/-10% error for character pitch / body size.
33 static const float kFPTolerance = 0.1;
34 
35 // Minimum ratio of "good" character pitch for a row to be considered
36 // to be fixed-pitch.
37 static const float kFixedPitchThreshold = 0.35;
38 
39 // rank statistics for a small collection of float values.
40 class SimpleStats {
41  public:
42  SimpleStats(): finalized_(false), values_() { }
43  ~SimpleStats() { }
44 
45  void Clear() {
46  values_.clear();
47  finalized_ = false;
48  }
49 
50  void Add(float value) {
51  values_.push_back(value);
52  finalized_ = false;
53  }
54 
55  void Finish() {
56  values_.sort(float_compare);
57  finalized_ = true;
58  }
59 
60  float ile(double frac) {
61  if (!finalized_) Finish();
62  if (values_.empty()) return 0.0;
63  if (frac >= 1.0) return values_.back();
64  if (frac <= 0.0 || values_.size() == 1) return values_[0];
65  int index = static_cast<int>((values_.size() - 1) * frac);
66  float reminder = (values_.size() - 1) * frac - index;
67 
68  return values_[index] * (1.0 - reminder) +
69  values_[index + 1] * reminder;
70  }
71 
72  float median() {
73  return ile(0.5);
74  }
75 
76  float maximum() {
77  if (!finalized_) Finish();
78  if (values_.empty()) return 0.0;
79  return values_.back();
80  }
81 
82  float minimum() {
83  if (!finalized_) Finish();
84  if (values_.empty()) return 0.0;
85  return values_[0];
86  }
87 
88  int size() const {
89  return values_.size();
90  }
91 
92  private:
93  static int float_compare(const void* a, const void* b) {
94  const float* f_a = reinterpret_cast<const float*>(a);
95  const float* f_b = reinterpret_cast<const float*>(b);
96  return (*f_a > *f_b) ? 1 : ((*f_a < *f_b) ? -1 : 0);
97  }
98 
99  bool finalized_;
100  GenericVector<float> values_;
101 };
102 
103 // statistics for a small collection of float pairs (x, y).
104 // EstimateYFor(x, r) returns the estimated y at x, based on
105 // existing samples between x*(1-r) ~ x*(1+r).
106 class LocalCorrelation {
107  public:
108  struct float_pair {
109  float x, y;
110  int vote;
111  };
112 
113  LocalCorrelation(): finalized_(false) { }
114  ~LocalCorrelation() { }
115 
116  void Finish() {
117  values_.sort(float_pair_compare);
118  finalized_ = true;
119  }
120 
121  void Clear() {
122  finalized_ = false;
123  }
124 
125  void Add(float x, float y, int v) {
126  struct float_pair value;
127  value.x = x;
128  value.y = y;
129  value.vote = v;
130  values_.push_back(value);
131  finalized_ = false;
132  }
133 
134  float EstimateYFor(float x, float r) {
135  ASSERT_HOST(finalized_);
136  int start = 0, end = values_.size();
137  // Because the number of samples (used_) is assumed to be small,
138  // just use linear search to find values within the range.
139  while (start < values_.size() && values_[start].x < x * (1.0 - r)) start++;
140  while (end - 1 >= 0 && values_[end - 1].x > x * (1.0 + r)) end--;
141 
142  // Fall back to the global average if there are no data within r
143  // of x.
144  if (start >= end) {
145  start = 0;
146  end = values_.size();
147  }
148 
149  // Compute weighted average of the values.
150  float rc = 0;
151  int vote = 0;
152  for (int i = start; i < end; i++) {
153  rc += values_[i].vote * x * values_[i].y / values_[i].x;
154  vote += values_[i].vote;
155  }
156 
157  return rc / vote;
158  }
159 
160  private:
161  static int float_pair_compare(const void* a, const void* b) {
162  const float_pair* f_a = reinterpret_cast<const float_pair*>(a);
163  const float_pair* f_b = reinterpret_cast<const float_pair*>(b);
164  return (f_a->x > f_b->x) ? 1 : ((f_a->x < f_b->x) ? -1 : 0);
165  }
166 
167  bool finalized_;
169 };
170 
171 // Class to represent a character on a fixed pitch row. A FPChar may
172 // consist of multiple blobs (BLOBNBOX's).
173 class FPChar {
174  public:
175  enum Alignment {
176  ALIGN_UNKNOWN, ALIGN_GOOD, ALIGN_BAD
177  };
178 
179  FPChar(): box_(), real_body_(),
180  from_(NULL), to_(NULL), num_blobs_(0), max_gap_(0),
181  final_(false), alignment_(ALIGN_UNKNOWN),
182  merge_to_prev_(false), delete_flag_(false) {
183  }
184 
185  // Initialize from blob.
186  void Init(BLOBNBOX *blob) {
187  box_ = blob->bounding_box();
188  real_body_ = box_;
189  from_ = to_ = blob;
190  num_blobs_ = 1;
191  }
192 
193  // Merge this character with "next". The "next" character should
194  // consist of succeeding blobs on the same row.
195  void Merge(const FPChar &next) {
196  int gap = real_body_.x_gap(next.real_body_);
197  if (gap > max_gap_) max_gap_ = gap;
198 
199  box_ += next.box_;
200  real_body_ += next.real_body_;
201  to_ = next.to_;
202  num_blobs_ += next.num_blobs_;
203  }
204 
205  // Accessors.
206  const TBOX &box() const { return box_; }
207  void set_box(const TBOX &box) {
208  box_ = box;
209  }
210  const TBOX &real_body() const { return real_body_; }
211 
212  bool is_final() const { return final_; }
213  void set_final(bool flag) {
214  final_ = flag;
215  }
216 
217  const Alignment& alignment() const {
218  return alignment_;
219  }
220  void set_alignment(Alignment alignment) {
221  alignment_ = alignment;
222  }
223 
224  bool merge_to_prev() const {
225  return merge_to_prev_;
226  }
227  void set_merge_to_prev(bool flag) {
228  merge_to_prev_ = flag;
229  }
230 
231  bool delete_flag() const {
232  return delete_flag_;
233  }
234  void set_delete_flag(bool flag) {
235  delete_flag_ = flag;
236  }
237 
238  int max_gap() const {
239  return max_gap_;
240  }
241 
242  int num_blobs() const {
243  return num_blobs_;
244  }
245 
246  private:
247  TBOX box_; // Rectangle region considered to be occupied by this
248  // character. It could be bigger than the bounding box.
249  TBOX real_body_; // Real bounding box of this character.
250  BLOBNBOX *from_; // The first blob of this character.
251  BLOBNBOX *to_; // The last blob of this character.
252  int num_blobs_; // Number of blobs that belong to this character.
253  int max_gap_; // Maximum x gap between the blobs.
254 
255  bool final_; // True if alignment/fragmentation decision for this
256  // character is finalized.
257 
258  Alignment alignment_; // Alignment status.
259  bool merge_to_prev_; // True if this is a fragmented blob that
260  // needs to be merged to the previous
261  // character.
262 
263  int delete_flag_; // True if this character is merged to another
264  // one and needs to be deleted.
265 };
266 
267 // Class to represent a fixed pitch row, as a linear collection of
268 // FPChar's.
269 class FPRow {
270  public:
271  FPRow() : pitch_(0.0f), estimated_pitch_(0.0f),
272  all_pitches_(), all_gaps_(), good_pitches_(), good_gaps_(),
273  heights_(), characters_(), real_row_(NULL) {
274  }
275 
276  ~FPRow() { }
277 
278  // Initialize from TD_ROW.
279  void Init(TO_ROW *row);
280 
281  // Estimate character pitch of this row, based on current alignment
282  // status of underlying FPChar's. The argument pass1 can be set to
283  // true if the function is called after Pass1Analyze(), to eliminate
284  // some redundant computation.
285  void EstimatePitch(bool pass1);
286 
287  // Check each character if it has good character pitches between its
288  // predecessor and its successor and set its alignment status. If
289  // we already calculated the estimated pitch for this row, the value
290  // is used. If we didn't, a character is considered to be good, if
291  // the pitches between its predecessor and its successor are almost
292  // equal.
293  void Pass1Analyze();
294 
295  // Find characters that fit nicely into one imaginary body next to a
296  // character which is already finalized. Then mark them as character
297  // fragments.
298  bool Pass2Analyze();
299 
300  // Merge FPChars marked as character fragments into one.
301  void MergeFragments();
302 
303  // Finalize characters that are already large enough and cannot be
304  // merged with others any more.
305  void FinalizeLargeChars();
306 
307  // Ouput pitch estimation results to attributes of TD_ROW.
308  void OutputEstimations();
309 
310  void DebugOutputResult(int row_index);
311 
312  int good_pitches() {
313  return good_pitches_.size();
314  }
315 
316  int good_gaps() {
317  return good_gaps_.size();
318  }
319 
320  float pitch() {
321  return pitch_;
322  }
323 
324  float estimated_pitch() {
325  return estimated_pitch_;
326  }
327 
328  void set_estimated_pitch(float v) {
329  estimated_pitch_ = v;
330  }
331 
332  float height() {
333  return height_;
334  }
335 
336  float height_pitch_ratio() {
337  if (good_pitches_.size() < 2) return -1.0;
338  return height_ / good_pitches_.median();
339  }
340 
341  float gap() {
342  return gap_;
343  }
344 
345  int num_chars() {
346  return characters_.size();
347  }
348  FPChar *character(int i) {
349  return &characters_[i];
350  }
351 
352  const TBOX &box(int i) {
353  return characters_[i].box();
354  }
355 
356  const TBOX &real_body(int i) {
357  return characters_[i].real_body();
358  }
359 
360  bool is_box_modified(int i) {
361  return !(characters_[i].box() == characters_[i].real_body());
362  }
363 
364  float center_x(int i) {
365  return (characters_[i].box().left() + characters_[i].box().right()) / 2.0;
366  }
367 
368  bool is_final(int i) {
369  return characters_[i].is_final();
370  }
371 
372  void finalize(int i) {
373  characters_[i].set_final(true);
374  }
375 
376  bool is_good(int i) {
377  return characters_[i].alignment() == FPChar::ALIGN_GOOD;
378  }
379 
380  bool is_bad(int i) {
381  return characters_[i].alignment() == FPChar::ALIGN_BAD;
382  }
383 
384  bool is_unknown(int i) {
385  return characters_[i].alignment() == FPChar::ALIGN_UNKNOWN;
386  }
387 
388  void mark_good(int i) {
389  characters_[i].set_alignment(FPChar::ALIGN_GOOD);
390  }
391 
392  void mark_bad(int i) {
393  characters_[i].set_alignment(FPChar::ALIGN_BAD);
394  }
395 
396  void clear_alignment(int i) {
397  characters_[i].set_alignment(FPChar::ALIGN_UNKNOWN);
398  }
399 
400  private:
401  static float x_overlap_fraction(const TBOX& box1, const TBOX& box2) {
402  if (MIN(box1.width(), box2.width()) == 0) return 0.0;
403  return -box1.x_gap(box2) / (float)MIN(box1.width(), box2.width());
404  }
405 
406  static bool mostly_overlap(const TBOX& box1, const TBOX& box2) {
407  return x_overlap_fraction(box1, box2) > 0.9;
408  }
409 
410  static bool significant_overlap(const TBOX& box1, const TBOX& box2) {
411  if (MIN(box1.width(), box2.width()) == 0) return false;
412  int overlap = -box1.x_gap(box2);
413  return overlap > 1 || x_overlap_fraction(box1, box2) > 0.1;
414  }
415 
416  static float box_pitch(const TBOX& ref, const TBOX& box) {
417  return abs(ref.left() + ref.right() - box.left() - box.right()) / 2.0;
418  }
419 
420  // Check if two neighboring characters satisfy the fixed pitch model.
421  static bool is_good_pitch(float pitch, const TBOX& box1, const TBOX& box2) {
422  // Character box shouldn't exceed pitch.
423  if (box1.width() >= pitch * (1.0 + kFPTolerance) ||
424  box2.width() >= pitch * (1.0 + kFPTolerance) ||
425  box1.height() >= pitch * (1.0 + kFPTolerance) ||
426  box2.height() >= pitch * (1.0 + kFPTolerance)) return false;
427 
428  const float real_pitch = box_pitch(box1, box2);
429  if (fabs(real_pitch - pitch) < pitch * kFPTolerance) return true;
430 
432  // Hangul characters usually have fixed pitch, but words are
433  // delimited by space which can be narrower than characters.
434  if (real_pitch > pitch && real_pitch < pitch * 2.0 &&
435  real_pitch - box1.x_gap(box2) < pitch) {
436  return true;
437  }
438  }
439  return false;
440  }
441 
442  static bool is_interesting_blob(const BLOBNBOX *blob) {
443  return !blob->joined_to_prev() && blob->flow() != BTFT_LEADER;
444  }
445 
446  // Cleanup chars that are already merged to others.
447  void DeleteChars() {
448  int index = 0;
449  for (int i = 0; i < characters_.size(); ++i) {
450  if (!characters_[i].delete_flag()) {
451  if (index != i) characters_[index] = characters_[i];
452  index++;
453  }
454  }
455  characters_.truncate(index);
456  }
457 
458  float pitch_; // Character pitch.
459  float estimated_pitch_; // equal to pitch_ if pitch_ is considered
460  // to be good enough.
461  float height_; // Character height.
462  float gap_; // Minimum gap between characters.
463 
464  // Pitches between any two successive characters.
465  SimpleStats all_pitches_;
466  // Gaps between any two successive characters.
467  SimpleStats all_gaps_;
468  // Pitches between any two successive characters that are consistent
469  // with the fixed pitch model.
470  SimpleStats good_pitches_;
471  // Gaps between any two successive characters that are consistent
472  // with the fixed pitch model.
473  SimpleStats good_gaps_;
474 
475  SimpleStats heights_;
476 
477  GenericVector<FPChar> characters_;
478  TO_ROW *real_row_; // Underlying TD_ROW for this row.
479 };
480 
481 void FPRow::Init(TO_ROW *row) {
482  ASSERT_HOST(row != NULL);
483  ASSERT_HOST(row->xheight > 0);
484  real_row_ = row;
485  real_row_->pitch_decision = PITCH_CORR_PROP; // Default decision.
486 
487  BLOBNBOX_IT blob_it = row->blob_list();
488  // Initialize characters_ and compute the initial estimation of
489  // character height.
490  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
491  if (is_interesting_blob(blob_it.data())) {
492  FPChar fp_char;
493  fp_char.Init(blob_it.data());
494  // Merge unconditionally if two blobs overlap.
495  if (!characters_.empty() &&
496  significant_overlap(fp_char.box(), characters_.back().box())) {
497  characters_.back().Merge(fp_char);
498  } else {
499  characters_.push_back(fp_char);
500  }
501  TBOX bound = blob_it.data()->bounding_box();
502  if (bound.height() * 3.0 > bound.width()) {
503  heights_.Add(bound.height());
504  }
505  }
506  }
507  heights_.Finish();
508  height_ = heights_.ile(0.875);
509 }
510 
511 void FPRow::OutputEstimations() {
512  if (good_pitches_.size() == 0) {
513  pitch_ = 0.0f;
514  real_row_->pitch_decision = PITCH_CORR_PROP;
515  return;
516  }
517 
518  pitch_ = good_pitches_.median();
519  real_row_->fixed_pitch = pitch_;
520  // good_gaps_.ile(0.125) can be large if most characters on the row
521  // are skinny. Use pitch_ - height_ instead if it's smaller, but
522  // positive.
523  real_row_->kern_size = real_row_->pr_nonsp =
524  MIN(good_gaps_.ile(0.125), MAX(pitch_ - height_, 0));
525  real_row_->body_size = pitch_ - real_row_->kern_size;
526 
527  if (good_pitches_.size() < all_pitches_.size() * kFixedPitchThreshold) {
528  // If more than half of the characters of a line don't fit to the
529  // fixed pitch model, consider the line to be propotional. 50%
530  // seems to be a good threshold in practice as well.
531  // Anyway we store estimated values (fixed_pitch, kern_size, etc.) in
532  // real_row_ as a partial estimation result and try to use them in the
533  // normalization process.
534  real_row_->pitch_decision = PITCH_CORR_PROP;
535  return;
536  } else if (good_pitches_.size() > all_pitches_.size() * 0.75) {
537  real_row_->pitch_decision = PITCH_DEF_FIXED;
538  } else {
539  real_row_->pitch_decision = PITCH_CORR_FIXED;
540  }
541 
542  real_row_->space_size = real_row_->pr_space = pitch_;
543  // Set min_space to 50% of character pitch so that we can break CJK
544  // text at a half-width space after punctuation.
545  real_row_->min_space = (pitch_ + good_gaps_.minimum()) * 0.5;
546 
547  // Don't consider a quarter space as a real space, because it's used
548  // for line justification in traditional Japanese books.
549  real_row_->max_nonspace = MAX(pitch_ * 0.25 + good_gaps_.minimum(),
550  (double)good_gaps_.ile(0.875));
551 
552  int space_threshold =
553  MIN((real_row_->max_nonspace + real_row_->min_space) / 2,
554  real_row_->xheight);
555 
556  // Make max_nonspace larger than any intra-character gap so that
557  // make_prop_words() won't break a row at the middle of a character.
558  for (int i = 0; i < num_chars(); ++i) {
559  if (characters_[i].max_gap() > real_row_->max_nonspace) {
560  real_row_->max_nonspace = characters_[i].max_gap();
561  }
562  }
563  real_row_->space_threshold =
564  MIN((real_row_->max_nonspace + real_row_->min_space) / 2,
565  real_row_->xheight);
566  real_row_->used_dm_model = false;
567 
568  // Setup char_cells.
569  ICOORDELT_IT cell_it = &real_row_->char_cells;
570  ICOORDELT *cell = new ICOORDELT(real_body(0).left(), 0);
571  cell_it.add_after_then_move(cell);
572 
573  int right = real_body(0).right();
574  for (int i = 1; i < num_chars(); ++i) {
575  // Put a word break if gap between two characters is bigger than
576  // space_threshold. Don't break if none of two characters
577  // couldn't be "finalized", because maybe they need to be merged
578  // to one character.
579  if ((is_final(i - 1) || is_final(i)) &&
580  real_body(i - 1).x_gap(real_body(i)) > space_threshold) {
581  cell = new ICOORDELT(right + 1, 0);
582  cell_it.add_after_then_move(cell);
583  while (right + pitch_ < box(i).left()) {
584  right += pitch_;
585  cell = new ICOORDELT(right + 1, 0);
586  cell_it.add_after_then_move(cell);
587  }
588  right = box(i).left();
589  }
590  cell = new ICOORDELT((right + real_body(i).left()) / 2, 0);
591  cell_it.add_after_then_move(cell);
592  right = real_body(i).right();
593  }
594 
595  cell = new ICOORDELT(right + 1, 0);
596  cell_it.add_after_then_move(cell);
597 
598  // TODO(takenaka): add code to store alignment/fragmentation
599  // information to blobs so that it can be reused later, e.g. in
600  // recognition phase.
601 }
602 
603 void FPRow::EstimatePitch(bool pass1) {
604  good_pitches_.Clear();
605  all_pitches_.Clear();
606  good_gaps_.Clear();
607  all_gaps_.Clear();
608  heights_.Clear();
609  if (num_chars() == 0) return;
610 
611  inT32 cx0, cx1;
612  bool prev_was_good = is_good(0);
613  cx0 = center_x(0);
614 
615  heights_.Add(box(0).height());
616  for (int i = 1; i < num_chars(); i++) {
617  cx1 = center_x(i);
618  inT32 pitch = cx1 - cx0;
619  inT32 gap = MAX(0, real_body(i - 1).x_gap(real_body(i)));
620 
621  heights_.Add(box(i).height());
622  // Ignore if the pitch is too close. But don't ignore wide pitch
623  // may be the result of large tracking.
624  if (pitch > height_ * 0.5) {
625  all_pitches_.Add(pitch);
626  all_gaps_.Add(gap);
627  if (is_good(i)) {
628  // In pass1 (after Pass1Analyze()), all characters marked as
629  // "good" have a good consistent pitch with their previous
630  // characters. However, it's not true in pass2 and a good
631  // character may have a good pitch only between its successor.
632  // So we collect only pitch values between two good
633  // characters. and within tolerance in pass2.
634  if (pass1 || (prev_was_good &&
635  fabs(estimated_pitch_ - pitch) <
636  kFPTolerance * estimated_pitch_)) {
637  good_pitches_.Add(pitch);
638  if (!is_box_modified(i - 1) && !is_box_modified(i)) {
639  good_gaps_.Add(gap);
640  }
641  }
642  prev_was_good = true;
643  } else {
644  prev_was_good = false;
645  }
646  }
647  cx0 = cx1;
648  }
649 
650  good_pitches_.Finish();
651  all_pitches_.Finish();
652  good_gaps_.Finish();
653  all_gaps_.Finish();
654  heights_.Finish();
655 
656  height_ = heights_.ile(0.875);
657  if (all_pitches_.size() == 0) {
658  pitch_ = 0.0f;
659  gap_ = 0.0f;
660  } else if (good_pitches_.size() < 2) {
661  // We don't have enough data to estimate the pitch of this row yet.
662  // Use median of all pitches as the initial guess.
663  pitch_ = all_pitches_.median();
664  ASSERT_HOST(pitch_ > 0.0f);
665  gap_ = all_gaps_.ile(0.125);
666  } else {
667  pitch_ = good_pitches_.median();
668  ASSERT_HOST(pitch_ > 0.0f);
669  gap_ = good_gaps_.ile(0.125);
670  }
671 }
672 
673 void FPRow::DebugOutputResult(int row_index) {
674  if (num_chars() > 0) {
675  tprintf("Row %d: pitch_decision=%d, fixed_pitch=%f, max_nonspace=%d, "
676  "space_size=%f, space_threshold=%d, xheight=%f\n",
677  row_index, (int)(real_row_->pitch_decision),
678  real_row_->fixed_pitch, real_row_->max_nonspace,
679  real_row_->space_size, real_row_->space_threshold,
680  real_row_->xheight);
681 
682  for (int i = 0; i < num_chars(); i++) {
683  tprintf("Char %d: is_final=%d is_good=%d num_blobs=%d: ",
684  i, is_final(i), is_good(i), character(i)->num_blobs());
685  box(i).print();
686  }
687  }
688 }
689 
690 void FPRow::Pass1Analyze() {
691  if (num_chars() < 2) return;
692 
693  if (estimated_pitch_ > 0.0f) {
694  for (int i = 2; i < num_chars(); i++) {
695  if (is_good_pitch(estimated_pitch_, box(i - 2), box(i-1)) &&
696  is_good_pitch(estimated_pitch_, box(i - 1), box(i))) {
697  mark_good(i - 1);
698  }
699  }
700  } else {
701  for (int i = 2; i < num_chars(); i++) {
702  if (is_good_pitch(box_pitch(box(i-2), box(i-1)), box(i - 1), box(i))) {
703  mark_good(i - 1);
704  }
705  }
706  }
707  character(0)->set_alignment(character(1)->alignment());
708  character(num_chars() - 1)->set_alignment(
709  character(num_chars() - 2)->alignment());
710 }
711 
712 bool FPRow::Pass2Analyze() {
713  bool changed = false;
714  if (num_chars() <= 1 || estimated_pitch_ == 0.0f) {
715  return false;
716  }
717  for (int i = 0; i < num_chars(); i++) {
718  if (is_final(i)) continue;
719 
720  FPChar::Alignment alignment = character(i)->alignment();
721  bool intersecting = false;
722  bool not_intersecting = false;
723 
724  if (i < num_chars() - 1 && is_final(i + 1)) {
725  // Next character is already finalized. Estimate the imaginary
726  // body including this character based on the character. Skip
727  // whitespace if necessary.
728  bool skipped_whitespaces = false;
729  float c1 = center_x(i + 1) - 1.5 * estimated_pitch_;
730  while (c1 > box(i).right()) {
731  skipped_whitespaces = true;
732  c1 -= estimated_pitch_;
733  }
734  TBOX ibody(c1, box(i).bottom(), c1 + estimated_pitch_, box(i).top());
735 
736  // Collect all characters that mostly fit in the region.
737  // Also, their union height shouldn't be too big.
738  int j = i;
739  TBOX merged;
740  while (j >= 0 && !is_final(j) && mostly_overlap(ibody, box(j)) &&
741  merged.bounding_union(box(j)).height() <
742  estimated_pitch_ * (1 + kFPTolerance)) {
743  merged += box(j);
744  j--;
745  }
746 
747  if (j >= 0 && significant_overlap(ibody, box(j))) {
748  // character(j) lies on the character boundary and doesn't fit
749  // well into the imaginary body.
750  if (!is_final(j)) intersecting = true;
751  } else {
752  not_intersecting = true;
753  if (i - j > 0) {
754  // Merge character(j+1) ... character(i) because they fit
755  // into the body nicely.
756  if (i - j == 1) {
757  // Only one char in the imaginary body.
758  if (!skipped_whitespaces) mark_good(i);
759  // set ibody as bounding box of this character to get
760  // better pitch analysis result for halfwidth glyphs
761  // followed by a halfwidth space.
762  if (box(i).width() <= estimated_pitch_ * 0.5) {
763  ibody += box(i);
764  character(i)->set_box(ibody);
765  }
766  character(i)->set_merge_to_prev(false);
767  finalize(i);
768  } else {
769  for (int k = i; k > j + 1; k--) {
770  character(k)->set_merge_to_prev(true);
771  }
772  }
773  }
774  }
775  }
776  if (i > 0 && is_final(i - 1)) {
777  // Now we repeat everything from the opposite side. Previous
778  // character is already finalized. Estimate the imaginary body
779  // including this character based on the character.
780  bool skipped_whitespaces = false;
781  float c1 = center_x(i - 1) + 1.5 * estimated_pitch_;
782  while (c1 < box(i).left()) {
783  skipped_whitespaces = true;
784  c1 += estimated_pitch_;
785  }
786  TBOX ibody(c1 - estimated_pitch_, box(i).bottom(), c1, box(i).top());
787 
788  int j = i;
789  TBOX merged;
790  while (j < num_chars() && !is_final(j) && mostly_overlap(ibody, box(j)) &&
791  merged.bounding_union(box(j)).height() <
792  estimated_pitch_ * (1 + kFPTolerance)) {
793  merged += box(j);
794  j++;
795  }
796 
797  if (j < num_chars() && significant_overlap(ibody, box(j))) {
798  if (!is_final(j)) intersecting = true;
799  } else {
800  not_intersecting = true;
801  if (j - i > 0) {
802  if (j - i == 1) {
803  if (!skipped_whitespaces) mark_good(i);
804  if (box(i).width() <= estimated_pitch_ * 0.5) {
805  ibody += box(i);
806  character(i)->set_box(ibody);
807  }
808  character(i)->set_merge_to_prev(false);
809  finalize(i);
810  } else {
811  for (int k = i + 1; k < j; k++) {
812  character(k)->set_merge_to_prev(true);
813  }
814  }
815  }
816  }
817  }
818 
819  // This character doesn't fit well into the estimated imaginary
820  // bodies. Mark it as bad.
821  if (intersecting && !not_intersecting) mark_bad(i);
822  if (character(i)->alignment() != alignment ||
823  character(i)->merge_to_prev()) {
824  changed = true;
825  }
826  }
827 
828  return changed;
829 }
830 
831 void FPRow::MergeFragments() {
832  int last_char = 0;
833 
834  for (int j = 0; j < num_chars(); ++j) {
835  if (character(j)->merge_to_prev()) {
836  character(last_char)->Merge(*character(j));
837  character(j)->set_delete_flag(true);
838  clear_alignment(last_char);
839  character(j-1)->set_merge_to_prev(false);
840  } else {
841  last_char = j;
842  }
843  }
844  DeleteChars();
845 }
846 
847 void FPRow::FinalizeLargeChars() {
848  float row_pitch = estimated_pitch();
849  for (int i = 0; i < num_chars(); i++) {
850  if (is_final(i)) continue;
851 
852  // Finalize if both neighbors are finalized. We have no other choice.
853  if (i > 0 && is_final(i - 1) && i < num_chars() - 1 && is_final(i + 1)) {
854  finalize(i);
855  continue;
856  }
857 
858  float cx = center_x(i);
859  TBOX ibody(cx - 0.5 * row_pitch, 0, cx + 0.5 * row_pitch, 1);
860  if (i > 0) {
861  // The preceding character significantly intersects with the
862  // imaginary body of this character. Let Pass2Analyze() handle
863  // this case.
864  if (x_overlap_fraction(ibody, box(i - 1)) > 0.1) continue;
865  if (!is_final(i - 1)) {
866  TBOX merged = box(i);
867  merged += box(i - 1);
868  if (merged.width() < row_pitch) continue;
869  // This character cannot be finalized yet because it can be
870  // merged with the previous one. Again, let Pass2Analyze()
871  // handle this case.
872  }
873  }
874  if (i < num_chars() - 1) {
875  if (x_overlap_fraction(ibody, box(i + 1)) > 0.1) continue;
876  if (!is_final(i + 1)) {
877  TBOX merged = box(i);
878  merged += box(i + 1);
879  if (merged.width() < row_pitch) continue;
880  }
881  }
882  finalize(i);
883  }
884 
885  // Update alignment decision. We only consider finalized characters
886  // in pass2. E.g. if a finalized character C has another finalized
887  // character L on its left and a not-finalized character R on its
888  // right, we mark C as good if the pitch between C and L is good,
889  // regardless of the pitch between C and R.
890  for (int i = 0; i < num_chars(); i++) {
891  if (!is_final(i)) continue;
892  bool good_pitch = false;
893  bool bad_pitch = false;
894  if (i > 0 && is_final(i - 1)) {
895  if (is_good_pitch(row_pitch, box(i - 1), box(i))) {
896  good_pitch = true;
897  } else {
898  bad_pitch = true;
899  }
900  }
901  if (i < num_chars() - 1 && is_final(i + 1)) {
902  if (is_good_pitch(row_pitch, box(i), box(i + 1))) {
903  good_pitch = true;
904  } else {
905  bad_pitch = true;
906  }
907  }
908  if (good_pitch && !bad_pitch) mark_good(i);
909  else if (!good_pitch && bad_pitch) mark_bad(i);
910  }
911 }
912 
913 class FPAnalyzer {
914  public:
915  FPAnalyzer(): page_tr_(), rows_() { }
916  ~FPAnalyzer() { }
917 
918  void Init(ICOORD page_tr, TO_BLOCK_LIST *port_blocks);
919 
920  void Pass1Analyze() {
921  for (int i = 0; i < rows_.size(); i++) rows_[i].Pass1Analyze();
922  }
923 
924  // Estimate character pitch for each row. The argument pass1 can be
925  // set to true if the function is called after Pass1Analyze(), to
926  // eliminate some redundant computation.
927  void EstimatePitch(bool pass1);
928 
929  bool maybe_fixed_pitch() {
930  if (rows_.empty() ||
931  rows_.size() <= num_bad_rows_ + num_tall_rows_ + 1) return false;
932  return true;
933  }
934 
935  void MergeFragments() {
936  for (int i = 0; i < rows_.size(); i++) rows_[i].MergeFragments();
937  }
938 
939  void FinalizeLargeChars() {
940  for (int i = 0; i < rows_.size(); i++) rows_[i].FinalizeLargeChars();
941  }
942 
943  bool Pass2Analyze() {
944  bool changed = false;
945  for (int i = 0; i < rows_.size(); i++) {
946  if (rows_[i].Pass2Analyze()) {
947  changed = true;
948  }
949  }
950  return changed;
951  }
952 
953  void OutputEstimations() {
954  for (int i = 0; i < rows_.size(); i++) rows_[i].OutputEstimations();
955  // Don't we need page-level estimation of gaps/spaces?
956  }
957 
958  void DebugOutputResult() {
959  tprintf("FPAnalyzer: final result\n");
960  for (int i = 0; i < rows_.size(); i++) rows_[i].DebugOutputResult(i);
961  }
962 
963  int num_rows() {
964  return rows_.size();
965  }
966 
967  // Returns the upper limit for pass2 loop iteration.
968  int max_iteration() {
969  // We're fixing at least one character per iteration. So basically
970  // we shouldn't require more than max_chars_per_row_ iterations.
971  return max_chars_per_row_ + 100;
972  }
973 
974  private:
975  ICOORD page_tr_;
976  GenericVector<FPRow> rows_;
977  int num_tall_rows_;
978  int num_bad_rows_;
979  int num_empty_rows_;
980  int max_chars_per_row_;
981 };
982 
983 void FPAnalyzer::Init(ICOORD page_tr, TO_BLOCK_LIST *port_blocks) {
984  page_tr_ = page_tr;
985 
986  TO_BLOCK_IT block_it;
987  block_it.set_to_list (port_blocks);
988 
989  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
990  block_it.forward()) {
991  TO_BLOCK *block = block_it.data();
992  if (!block->get_rows()->empty()) {
993  ASSERT_HOST(block->xheight > 0);
994  find_repeated_chars(block, FALSE);
995  }
996  }
997 
998  num_empty_rows_ = 0;
999  max_chars_per_row_ = 0;
1000  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
1001  block_it.forward()) {
1002  TO_ROW_IT row_it = block_it.data()->get_rows();
1003  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1004  FPRow row;
1005  row.Init(row_it.data());
1006  rows_.push_back(row);
1007  int num_chars = rows_.back().num_chars();
1008  if (num_chars <= 1) num_empty_rows_++;
1009  if (num_chars > max_chars_per_row_) max_chars_per_row_ = num_chars;
1010  }
1011  }
1012 }
1013 
1014 void FPAnalyzer::EstimatePitch(bool pass1) {
1015  LocalCorrelation pitch_height_stats;
1016 
1017  num_tall_rows_ = 0;
1018  num_bad_rows_ = 0;
1019  pitch_height_stats.Clear();
1020  for (int i = 0; i < rows_.size(); i++) {
1021  rows_[i].EstimatePitch(pass1);
1022  if (rows_[i].good_pitches()) {
1023  pitch_height_stats.Add(rows_[i].height() + rows_[i].gap(),
1024  rows_[i].pitch(), rows_[i].good_pitches());
1025  if (rows_[i].height_pitch_ratio() > 1.1) num_tall_rows_++;
1026  } else {
1027  num_bad_rows_++;
1028  }
1029  }
1030 
1031  pitch_height_stats.Finish();
1032  for (int i = 0; i < rows_.size(); i++) {
1033  if (rows_[i].good_pitches() >= 5) {
1034  // We have enough evidences. Just use the pitch estimation
1035  // from this row.
1036  rows_[i].set_estimated_pitch(rows_[i].pitch());
1037  } else if (rows_[i].num_chars() > 1) {
1038  float estimated_pitch =
1039  pitch_height_stats.EstimateYFor(rows_[i].height() + rows_[i].gap(),
1040  0.1);
1041  // CJK characters are more likely to be fragmented than poorly
1042  // chopped. So trust the page-level estimation of character
1043  // pitch only if it's larger than row-level estimation or
1044  // row-level estimation is too large (2x bigger than row height).
1045  if (estimated_pitch > rows_[i].pitch() ||
1046  rows_[i].pitch() > rows_[i].height() * 2.0) {
1047  rows_[i].set_estimated_pitch(estimated_pitch);
1048  } else {
1049  rows_[i].set_estimated_pitch(rows_[i].pitch());
1050  }
1051  }
1052  }
1053 }
1054 
1055 } // namespace
1056 
1058  TO_BLOCK_LIST *port_blocks) {
1059  FPAnalyzer analyzer;
1060  analyzer.Init(page_tr, port_blocks);
1061  if (analyzer.num_rows() == 0) return;
1062 
1063  analyzer.Pass1Analyze();
1064  analyzer.EstimatePitch(true);
1065 
1066  // Perform pass1 analysis again with the initial estimation of row
1067  // pitches, for better estimation.
1068  analyzer.Pass1Analyze();
1069  analyzer.EstimatePitch(true);
1070 
1071  // Early exit if the page doesn't seem to contain fixed pitch rows.
1072  if (!analyzer.maybe_fixed_pitch()) {
1074  tprintf("Page doesn't seem to contain fixed pitch rows\n");
1075  }
1076  return;
1077  }
1078 
1079  int iteration = 0;
1080  do {
1081  analyzer.MergeFragments();
1082  analyzer.FinalizeLargeChars();
1083  analyzer.EstimatePitch(false);
1084  iteration++;
1085  } while (analyzer.Pass2Analyze() && iteration < analyzer.max_iteration());
1086 
1088  tprintf("compute_fixed_pitch_cjk finished after %d iteration (limit=%d)\n",
1089  iteration, analyzer.max_iteration());
1090  }
1091 
1092  analyzer.OutputEstimations();
1093  if (textord_debug_pitch_test) analyzer.DebugOutputResult();
1094 }
TBOX bounding_union(const TBOX &box) const
Definition: rect.cpp:129
const TBOX & bounding_box() const
Definition: blobbox.h:215
integer coordinate
Definition: points.h:30
PITCH_TYPE pitch_decision
Definition: blobbox.h:646
inT16 width() const
Definition: rect.h:111
#define MIN(x, y)
Definition: ndminx.h:28
BlobTextFlowType flow() const
Definition: blobbox.h:280
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
bool joined_to_prev() const
Definition: blobbox.h:241
void compute_fixed_pitch_cjk(ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
Definition: cjkpitch.cpp:1057
#define FALSE
Definition: capi.h:46
float xheight
Definition: blobbox.h:784
inT16 left() const
Definition: rect.h:68
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void print() const
Definition: rect.h:270
int x_gap(const TBOX &box) const
Definition: rect.h:217
inT16 height() const
Definition: rect.h:104
#define MAX(x, y)
Definition: ndminx.h:24
int inT32
Definition: host.h:35
#define tprintf(...)
Definition: tprintf.h:31
float xheight
Definition: blobbox.h:653
EXTERN bool textord_debug_pitch_test
Definition: topitch.cpp:44
bool textord_space_size_is_variable
Definition: cjkpitch.cpp:28
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
void find_repeated_chars(TO_BLOCK *block, BOOL8 testing_on)
Definition: topitch.cpp:1756
#define BOOL_VAR(name, val, comment)
Definition: params.h:280
#define ASSERT_HOST(x)
Definition: errcode.h:84