tesseract  3.05.02
makerow.cpp File Reference
#include "stderr.h"
#include "blobbox.h"
#include "ccstruct.h"
#include "detlinefit.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "oldbasel.h"
#include "textord.h"
#include "tordmain.h"
#include "underlin.h"
#include "makerow.h"
#include "tprintf.h"
#include "tovars.h"

Go to the source code of this file.

Namespaces

 tesseract
 

Macros

#define MAX_HEIGHT_MODES   12
 

Functions

float MakeRowFromSubBlobs (TO_BLOCK *block, C_BLOB *blob, TO_ROW_IT *row_it)
 
make_single_row

Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize.

float make_single_row (ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks)
 
make_rows

Arrange the blobs into rows.

float make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks)
 
make_initial_textrows

Arrange the good blobs into rows of text.

void make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
 
fit_lms_line

Fit an LMS line to a row.

void fit_lms_line (TO_ROW *row)
 
find_best_dropout_row

Delete this row if it has a neighbour with better dropout characteristics. TRUE is returned if the row should be deleted.

BOOL8 find_best_dropout_row (TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on)
 
deskew_block_coords

Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it.

TBOX deskew_block_coords (TO_BLOCK *block, float gradient)
 
compute_line_occupation

Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative.

void compute_line_occupation (TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas)
 
void compute_occupation_threshold (inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds)
 
compute_dropout_distances

Compute the distance from each coordinate to the nearest dropout.

void compute_dropout_distances (inT32 *occupation, inT32 *thresholds, inT32 line_count)
 
expand_rows

Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so.

void expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
void adjust_row_limits (TO_BLOCK *block)
 
compute_row_stats

Compute the linespacing and offset.

void compute_row_stats (TO_BLOCK *block, BOOL8 testing_on)
 
fill_heights

Fill the given heights with heights of the blobs that are legal candidates for estimating xheight.

void fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights)
 
compute_xheight_from_modes

Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, ‘, ", ’, etc) If cap_only, then force finding of only the top mode.

int compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise)
 
compute_row_descdrop

Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise.

inT32 compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights)
 
compute_height_modes

Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred.

inT32 compute_height_modes (STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes)
 
correct_row_xheight

Adjust the xheight etc of this row if not within reasonable limits of the average for the block.

void correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop)
 
separate_underlines

Test wide objects for being potential underlines. If they are then put them in a separate list in the block.

void separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, BOOL8 testing_on)
 
pre_associate_blobs

Associate overlapping blobs and fake chop wide blobs.

void pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
 
fit_parallel_rows

Re-fit the rows in the block to the given gradient.

void fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
fit_parallel_lms

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void fit_parallel_lms (float gradient, TO_ROW *row)
 
make_baseline_spline

Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly.

void make_baseline_spline (TO_ROW *row, TO_BLOCK *block)
 
segment_baseline

Divide the baseline up into segments which require a different quadratic fitted to them. Return TRUE if enough blobs were far enough away to need a quadratic.

BOOL8 segment_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
 
linear_spline_baseline

Divide the baseline up into segments which require a different quadratic fitted to them.

Returns
TRUE if enough blobs were far enough away to need a quadratic.
double * linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
 
assign_blobs_to_rows

Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it.

void assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
 
most_overlapping_row

Return the row which most overlaps the blob.

OVERLAP_STATE most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob)
 
blob_x_order

Sort function to sort blobs in x from page left.

int blob_x_order (const void *item1, const void *item2)
 
row_y_order

Sort function to sort rows in y from page top.

int row_y_order (const void *item1, const void *item2)
 
row_spacing_order

Qsort style function to compare 2 TO_ROWS based on their spacing value.

int row_spacing_order (const void *item1, const void *item2)
 
mark_repeated_chars

Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX.

void mark_repeated_chars (TO_ROW *row)
 

Variables

bool textord_heavy_nr = FALSE
 
bool textord_show_initial_rows = FALSE
 
bool textord_show_parallel_rows = FALSE
 
bool textord_show_expanded_rows = FALSE
 
bool textord_show_final_rows = FALSE
 
bool textord_show_final_blobs = FALSE
 
bool textord_test_landscape = FALSE
 
bool textord_parallel_baselines = TRUE
 
bool textord_straight_baselines = FALSE
 
bool textord_old_baselines = TRUE
 
bool textord_old_xheight = FALSE
 
bool textord_fix_xheight_bug = TRUE
 
bool textord_fix_makerow_bug = TRUE
 
bool textord_debug_xheights = FALSE
 
bool textord_biased_skewcalc = TRUE
 
bool textord_interpolating_skew = TRUE
 
int textord_skewsmooth_offset = 4
 
int textord_skewsmooth_offset2 = 1
 
int textord_test_x = -MAX_INT32
 
int textord_test_y = -MAX_INT32
 
int textord_min_blobs_in_row = 4
 
int textord_spline_minblobs = 8
 
int textord_spline_medianwin = 6
 
int textord_max_blob_overlaps = 4
 
int textord_min_xheight = 10
 
double textord_spline_shift_fraction = 0.02
 
double textord_spline_outlier_fraction = 0.1
 
double textord_skew_ile = 0.5
 
double textord_skew_lag = 0.02
 
double textord_linespace_iqrlimit = 0.2
 
double textord_width_limit = 8
 
double textord_chop_width = 1.5
 
double textord_expansion_factor = 1.0
 
double textord_overlap_x = 0.375
 
double textord_minxh = 0.25
 
double textord_min_linesize = 1.25
 
double textord_excess_blobsize = 1.3
 
double textord_occupancy_threshold = 0.4
 
double textord_underline_width = 2.0
 
double textord_min_blob_height_fraction = 0.75
 
double textord_xheight_mode_fraction = 0.4
 
double textord_ascheight_mode_fraction = 0.08
 
double textord_descheight_mode_fraction = 0.08
 
double textord_ascx_ratio_min = 1.25
 
double textord_ascx_ratio_max = 1.8
 
double textord_descx_ratio_min = 0.25
 
double textord_descx_ratio_max = 0.6
 
double textord_xheight_error_margin = 0.1
 
int textord_lms_line_trials = 12
 
bool textord_new_initial_xheight = TRUE
 
bool textord_debug_blob = FALSE
 
const int kMinLeaderCount = 5
 

compute_page_skew

Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row.

const double kNoiseSize = 0.5
 
const int kMinSize = 8
 
void compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
 
void vigorous_noise_removal (TO_BLOCK *block)
 
void cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 
void delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
 

Macro Definition Documentation

◆ MAX_HEIGHT_MODES

#define MAX_HEIGHT_MODES   12

Definition at line 105 of file makerow.cpp.

Function Documentation

◆ adjust_row_limits()

void adjust_row_limits ( TO_BLOCK block)

adjust_row_limits

Change the limits of rows to suit the default fractions.

Definition at line 1120 of file makerow.cpp.

1122  {
1123  TO_ROW *row; //current row
1124  float size; //size of row
1125  float ymax; //top of row
1126  float ymin; //bottom of row
1127  TO_ROW_IT row_it = block->get_rows ();
1128 
1130  tprintf("Adjusting row limits for block(%d,%d)\n",
1131  block->block->bounding_box().left(),
1132  block->block->bounding_box().top());
1133  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1134  row = row_it.data ();
1135  size = row->max_y () - row->min_y ();
1137  tprintf("Row at %f has min %f, max %f, size %f\n",
1138  row->intercept(), row->min_y(), row->max_y(), size);
1142  ymax = size * (tesseract::CCStruct::kXHeightFraction +
1145  row->set_limits (row->intercept () + ymin, row->intercept () + ymax);
1146  row->merged = FALSE;
1147  }
1148 }
void set_limits(float new_min, float new_max)
Definition: blobbox.h:618
float min_y() const
Definition: blobbox.h:557
float max_y() const
Definition: blobbox.h:554
BLOCK * block
Definition: blobbox.h:773
bool textord_show_expanded_rows
Definition: makerow.cpp:47
#define FALSE
Definition: capi.h:46
BOOL8 merged
Definition: blobbox.h:641
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
static const double kAscenderFraction
Definition: ccstruct.h:35
#define tprintf(...)
Definition: tprintf.h:31
static const double kXHeightFraction
Definition: ccstruct.h:34
static const double kDescenderFraction
Definition: ccstruct.h:33
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
float intercept() const
Definition: blobbox.h:584

◆ assign_blobs_to_rows()

void assign_blobs_to_rows ( TO_BLOCK block,
float *  gradient,
int  pass,
BOOL8  reject_misses,
BOOL8  make_new_rows,
BOOL8  drawing_skew 
)

Definition at line 2296 of file makerow.cpp.

2303  {
2304  OVERLAP_STATE overlap_result; //what to do with it
2305  float ycoord; //current y
2306  float top, bottom; //of blob
2307  float g_length = 1.0f; //from gradient
2308  inT16 row_count; //no of rows
2309  inT16 left_x; //left edge
2310  inT16 last_x; //previous edge
2311  float block_skew; //y delta
2312  float smooth_factor; //for new coords
2313  float near_dist; //dist to nearest row
2314  ICOORD testpt; //testing only
2315  BLOBNBOX *blob; //current blob
2316  TO_ROW *row; //current row
2317  TO_ROW *dest_row = NULL; //row to put blob in
2318  //iterators
2319  BLOBNBOX_IT blob_it = &block->blobs;
2320  TO_ROW_IT row_it = block->get_rows ();
2321 
2322  ycoord =
2323  (block->block->bounding_box ().bottom () +
2324  block->block->bounding_box ().top ()) / 2.0f;
2325  if (gradient != NULL)
2326  g_length = sqrt (1 + *gradient * *gradient);
2327 #ifndef GRAPHICS_DISABLED
2328  if (drawing_skew)
2329  to_win->SetCursor(block->block->bounding_box ().left (), ycoord);
2330 #endif
2331  testpt = ICOORD (textord_test_x, textord_test_y);
2332  blob_it.sort (blob_x_order);
2333  smooth_factor = 1.0;
2334  block_skew = 0.0f;
2335  row_count = row_it.length (); //might have rows
2336  if (!blob_it.empty ()) {
2337  left_x = blob_it.data ()->bounding_box ().left ();
2338  }
2339  else {
2340  left_x = block->block->bounding_box ().left ();
2341  }
2342  last_x = left_x;
2343  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list (); blob_it.forward ()) {
2344  blob = blob_it.data ();
2345  if (gradient != NULL) {
2346  block_skew = (1 - 1 / g_length) * blob->bounding_box ().bottom ()
2347  + *gradient / g_length * blob->bounding_box ().left ();
2348  }
2349  else if (blob->bounding_box ().left () - last_x > block->line_size / 2
2350  && last_x - left_x > block->line_size * 2
2352  // tprintf("Interpolating skew from %g",block_skew);
2353  block_skew *= (float) (blob->bounding_box ().left () - left_x)
2354  / (last_x - left_x);
2355  // tprintf("to %g\n",block_skew);
2356  }
2357  last_x = blob->bounding_box ().left ();
2358  top = blob->bounding_box ().top () - block_skew;
2359  bottom = blob->bounding_box ().bottom () - block_skew;
2360 #ifndef GRAPHICS_DISABLED
2361  if (drawing_skew)
2362  to_win->DrawTo(blob->bounding_box ().left (), ycoord + block_skew);
2363 #endif
2364  if (!row_it.empty ()) {
2365  for (row_it.move_to_first ();
2366  !row_it.at_last () && row_it.data ()->min_y () > top;
2367  row_it.forward ());
2368  row = row_it.data ();
2369  if (row->min_y () <= top && row->max_y () >= bottom) {
2370  //any overlap
2371  dest_row = row;
2372  overlap_result = most_overlapping_row (&row_it, dest_row,
2373  top, bottom,
2374  block->line_size,
2375  blob->bounding_box ().
2376  contains (testpt));
2377  if (overlap_result == NEW_ROW && !reject_misses)
2378  overlap_result = ASSIGN;
2379  }
2380  else {
2381  overlap_result = NEW_ROW;
2382  if (!make_new_rows) {
2383  near_dist = row_it.data_relative (-1)->min_y () - top;
2384  //below bottom
2385  if (bottom < row->min_y ()) {
2386  if (row->min_y () - bottom <=
2387  (block->line_spacing -
2389  //done it
2390  overlap_result = ASSIGN;
2391  dest_row = row;
2392  }
2393  }
2394  else if (near_dist > 0
2395  && near_dist < bottom - row->max_y ()) {
2396  row_it.backward ();
2397  dest_row = row_it.data ();
2398  if (dest_row->min_y () - bottom <=
2399  (block->line_spacing -
2401  //done it
2402  overlap_result = ASSIGN;
2403  }
2404  }
2405  else {
2406  if (top - row->max_y () <=
2407  (block->line_spacing -
2408  block->line_size) * (textord_overlap_x +
2410  //done it
2411  overlap_result = ASSIGN;
2412  dest_row = row;
2413  }
2414  }
2415  }
2416  }
2417  if (overlap_result == ASSIGN)
2418  dest_row->add_blob (blob_it.extract (), top, bottom,
2419  block->line_size);
2420  if (overlap_result == NEW_ROW) {
2421  if (make_new_rows && top - bottom < block->max_blob_size) {
2422  dest_row =
2423  new TO_ROW (blob_it.extract (), top, bottom,
2424  block->line_size);
2425  row_count++;
2426  if (bottom > row_it.data ()->min_y ())
2427  row_it.add_before_then_move (dest_row);
2428  //insert in right place
2429  else
2430  row_it.add_after_then_move (dest_row);
2431  smooth_factor =
2432  1.0 / (row_count * textord_skew_lag +
2434  }
2435  else
2436  overlap_result = REJECT;
2437  }
2438  }
2439  else if (make_new_rows && top - bottom < block->max_blob_size) {
2440  overlap_result = NEW_ROW;
2441  dest_row =
2442  new TO_ROW(blob_it.extract(), top, bottom, block->line_size);
2443  row_count++;
2444  row_it.add_after_then_move(dest_row);
2445  smooth_factor = 1.0 / (row_count * textord_skew_lag +
2447  }
2448  else
2449  overlap_result = REJECT;
2450  if (blob->bounding_box ().contains(testpt) && textord_debug_blob) {
2451  if (overlap_result != REJECT) {
2452  tprintf("Test blob assigned to row at (%g,%g) on pass %d\n",
2453  dest_row->min_y(), dest_row->max_y(), pass);
2454  }
2455  else {
2456  tprintf("Test blob assigned to no row on pass %d\n", pass);
2457  }
2458  }
2459  if (overlap_result != REJECT) {
2460  while (!row_it.at_first() &&
2461  row_it.data()->min_y() > row_it.data_relative(-1)->min_y()) {
2462  row = row_it.extract();
2463  row_it.backward();
2464  row_it.add_before_then_move(row);
2465  }
2466  while (!row_it.at_last() &&
2467  row_it.data ()->min_y() < row_it.data_relative (1)->min_y()) {
2468  row = row_it.extract();
2469  row_it.forward();
2470  // Keep rows in order.
2471  row_it.add_after_then_move(row);
2472  }
2473  BLOBNBOX_IT added_blob_it(dest_row->blob_list());
2474  added_blob_it.move_to_last();
2475  TBOX prev_box = added_blob_it.data_relative(-1)->bounding_box();
2476  if (dest_row->blob_list()->singleton() ||
2477  !prev_box.major_x_overlap(blob->bounding_box())) {
2478  block_skew = (1 - smooth_factor) * block_skew
2479  + smooth_factor * (blob->bounding_box().bottom() -
2480  dest_row->initial_min_y());
2481  }
2482  }
2483  }
2484  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
2485  if (row_it.data()->blob_list()->empty())
2486  delete row_it.extract(); // Discard empty rows.
2487  }
2488 }
void DrawTo(int x, int y)
Definition: scrollview.cpp:531
const TBOX & bounding_box() const
Definition: blobbox.h:215
short inT16
Definition: host.h:33
integer coordinate
Definition: points.h:30
int textord_test_x
Definition: makerow.cpp:62
float min_y() const
Definition: blobbox.h:557
bool textord_debug_blob
Definition: makerow.cpp:103
void SetCursor(int x, int y)
Definition: scrollview.cpp:525
float line_spacing
Definition: blobbox.h:775
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
float max_y() const
Definition: blobbox.h:554
double textord_overlap_x
Definition: makerow.cpp:81
BLOCK * block
Definition: blobbox.h:773
int textord_skewsmooth_offset
Definition: makerow.cpp:60
float line_size
Definition: blobbox.h:781
void add_blob(BLOBNBOX *blob, float top, float bottom, float row_size)
Definition: blobbox.cpp:728
OVERLAP_STATE
Definition: makerow.h:29
int textord_test_y
Definition: makerow.cpp:63
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2591
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:402
inT16 bottom() const
Definition: rect.h:61
bool contains(const FCOORD pt) const
Definition: rect.h:323
Definition: makerow.h:32
int textord_skewsmooth_offset2
Definition: makerow.cpp:61
inT16 left() const
Definition: rect.h:68
bool textord_interpolating_skew
Definition: makerow.cpp:59
double textord_skew_lag
Definition: makerow.cpp:75
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
static const double kAscenderFraction
Definition: ccstruct.h:35
#define tprintf(...)
Definition: tprintf.h:31
inT16 top() const
Definition: rect.h:54
OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob)
Definition: makerow.cpp:2496
float initial_min_y() const
Definition: blobbox.h:563
Definition: rect.h:30
static const double kDescenderFraction
Definition: ccstruct.h:33
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
Definition: makerow.h:31
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59

◆ blob_x_order()

int blob_x_order ( const void *  item1,
const void *  item2 
)

Definition at line 2591 of file makerow.cpp.

2593  {
2594  //converted ptr
2595  BLOBNBOX *blob1 = *(BLOBNBOX **) item1;
2596  //converted ptr
2597  BLOBNBOX *blob2 = *(BLOBNBOX **) item2;
2598 
2599  if (blob1->bounding_box ().left () < blob2->bounding_box ().left ())
2600  return -1;
2601  else if (blob1->bounding_box ().left () > blob2->bounding_box ().left ())
2602  return 1;
2603  else
2604  return 0;
2605 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
inT16 left() const
Definition: rect.h:68

◆ cleanup_rows_making()

void cleanup_rows_making ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

cleanup_rows_making

Remove overlapping rows and fit all the blobs to what's left.

Definition at line 524 of file makerow.cpp.

531  {
532  //iterators
533  BLOBNBOX_IT blob_it = &block->blobs;
534  TO_ROW_IT row_it = block->get_rows ();
535 
536 #ifndef GRAPHICS_DISABLED
537  if (textord_show_parallel_rows && testing_on) {
538  if (to_win == NULL)
539  create_to_win(page_tr);
540  }
541 #endif
542  //get row coords
543  fit_parallel_rows(block,
544  gradient,
545  rotation,
546  block_edge,
547  textord_show_parallel_rows &&testing_on);
549  gradient,
550  rotation,
551  block_edge,
552  textord_show_parallel_rows &&testing_on);
553  expand_rows(page_tr, block, gradient, rotation, block_edge, testing_on);
554  blob_it.set_to_list (&block->blobs);
555  row_it.set_to_list (block->get_rows ());
556  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
557  blob_it.add_list_after (row_it.data ()->blob_list ());
558  //give blobs back
559  assign_blobs_to_rows (block, &gradient, 1, FALSE, FALSE, FALSE);
560  //now new rows must be genuine
561  blob_it.set_to_list (&block->blobs);
562  blob_it.add_list_after (&block->large_blobs);
563  assign_blobs_to_rows (block, &gradient, 2, TRUE, TRUE, FALSE);
564  //safe to use big ones now
565  blob_it.set_to_list (&block->blobs);
566  //throw all blobs in
567  blob_it.add_list_after (&block->noise_blobs);
568  blob_it.add_list_after (&block->small_blobs);
569  assign_blobs_to_rows (block, &gradient, 3, FALSE, FALSE, FALSE);
570 }
void delete_non_dropout_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:577
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
void expand_rows(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:962
#define TRUE
Definition: capi.h:45
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
bool textord_show_parallel_rows
Definition: makerow.cpp:46
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2296
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
#define FALSE
Definition: capi.h:46
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:1948
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38

◆ compute_dropout_distances()

void compute_dropout_distances ( inT32 occupation,
inT32 thresholds,
inT32  line_count 
)

Definition at line 915 of file makerow.cpp.

919  {
920  inT32 line_index; //of thresholds line
921  inT32 distance; //from prev dropout
922  inT32 next_dist; //to next dropout
923  inT32 back_index; //for back filling
924  inT32 prev_threshold; //before overwrite
925 
926  distance = -line_count;
927  line_index = 0;
928  do {
929  do {
930  distance--;
931  prev_threshold = thresholds[line_index];
932  //distance from prev
933  thresholds[line_index] = distance;
934  line_index++;
935  }
936  while (line_index < line_count
937  && (occupation[line_index] < thresholds[line_index]
938  || occupation[line_index - 1] >= prev_threshold));
939  if (line_index < line_count) {
940  back_index = line_index - 1;
941  next_dist = 1;
942  while (next_dist < -distance && back_index >= 0) {
943  thresholds[back_index] = next_dist;
944  back_index--;
945  next_dist++;
946  distance++;
947  }
948  distance = 1;
949  }
950  }
951  while (line_index < line_count);
952 }
int inT32
Definition: host.h:35

◆ compute_height_modes()

inT32 compute_height_modes ( STATS heights,
inT32  min_height,
inT32  max_height,
inT32 modes,
inT32  maxmodes 
)

Definition at line 1640 of file makerow.cpp.

1644  { // size of modes
1645  inT32 pile_count; // no in source pile
1646  inT32 src_count; // no of source entries
1647  inT32 src_index; // current entry
1648  inT32 least_count; // height of smalllest
1649  inT32 least_index; // index of least
1650  inT32 dest_count; // index in modes
1651 
1652  src_count = max_height + 1 - min_height;
1653  dest_count = 0;
1654  least_count = MAX_INT32;
1655  least_index = -1;
1656  for (src_index = 0; src_index < src_count; src_index++) {
1657  pile_count = heights->pile_count(min_height + src_index);
1658  if (pile_count > 0) {
1659  if (dest_count < maxmodes) {
1660  if (pile_count < least_count) {
1661  // find smallest in array
1662  least_count = pile_count;
1663  least_index = dest_count;
1664  }
1665  modes[dest_count++] = min_height + src_index;
1666  } else if (pile_count >= least_count) {
1667  while (least_index < maxmodes - 1) {
1668  modes[least_index] = modes[least_index + 1];
1669  // shuffle up
1670  least_index++;
1671  }
1672  // new one on end
1673  modes[maxmodes - 1] = min_height + src_index;
1674  if (pile_count == least_count) {
1675  // new smallest
1676  least_index = maxmodes - 1;
1677  } else {
1678  least_count = heights->pile_count(modes[0]);
1679  least_index = 0;
1680  for (dest_count = 1; dest_count < maxmodes; dest_count++) {
1681  pile_count = heights->pile_count(modes[dest_count]);
1682  if (pile_count < least_count) {
1683  // find smallest
1684  least_count = pile_count;
1685  least_index = dest_count;
1686  }
1687  }
1688  }
1689  }
1690  }
1691  }
1692  return dest_count;
1693 }
int inT32
Definition: host.h:35
#define MAX_INT32
Definition: host.h:53
inT32 pile_count(inT32 value) const
Definition: statistc.h:78

◆ compute_line_occupation()

void compute_line_occupation ( TO_BLOCK block,
float  gradient,
inT32  min_y,
inT32  max_y,
inT32 occupation,
inT32 deltas 
)

Definition at line 781 of file makerow.cpp.

788  {
789  inT32 line_count; //maxy-miny+1
790  inT32 line_index; //of scan line
791  int index; //array index for daft compilers
792  TO_ROW *row; //current row
793  TO_ROW_IT row_it = block->get_rows ();
794  BLOBNBOX *blob; //current blob
795  BLOBNBOX_IT blob_it; //iterator
796  float length; //of skew vector
797  TBOX blob_box; //bounding box
798  FCOORD rotation; //inverse of skew
799 
800  line_count = max_y - min_y + 1;
801  length = sqrt (gradient * gradient + 1);
802  rotation = FCOORD (1 / length, -gradient / length);
803  for (line_index = 0; line_index < line_count; line_index++)
804  deltas[line_index] = 0;
805  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
806  row = row_it.data ();
807  blob_it.set_to_list (row->blob_list ());
808  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
809  blob_it.forward ()) {
810  blob = blob_it.data ();
811  blob_box = blob->bounding_box ();
812  blob_box.rotate (rotation);//de-skew it
813  int32_t width = blob_box.right() - blob_box.left();
814  index = blob_box.bottom() - min_y;
815  ASSERT_HOST(index >= 0 && index < line_count);
816  //count transitions
817  deltas[index] += width;
818  index = blob_box.top() - min_y;
819  ASSERT_HOST(index >= 0 && index < line_count);
820  deltas[index] -= width;
821  }
822  }
823  occupation[0] = deltas[0];
824  for (line_index = 1; line_index < line_count; line_index++)
825  occupation[line_index] = occupation[line_index - 1] + deltas[line_index];
826 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
void rotate(const FCOORD &vec)
Definition: rect.h:189
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 bottom() const
Definition: rect.h:61
inT16 left() const
Definition: rect.h:68
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
int inT32
Definition: host.h:35
Definition: points.h:189
inT16 top() const
Definition: rect.h:54
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
#define ASSERT_HOST(x)
Definition: errcode.h:84

◆ compute_occupation_threshold()

void compute_occupation_threshold ( inT32  low_window,
inT32  high_window,
inT32  line_count,
inT32 occupation,
inT32 thresholds 
)

compute_occupation_threshold

Compute thresholds for textline or not for the occupation array.

Definition at line 834 of file makerow.cpp.

840  {
841  inT32 line_index; //of thresholds line
842  inT32 low_index; //in occupation
843  inT32 high_index; //in occupation
844  inT32 sum; //current average
845  inT32 divisor; //to get thresholds
846  inT32 min_index; //of min occ
847  inT32 min_occ; //min in locality
848  inT32 test_index; //for finding min
849 
850  divisor =
851  (inT32) ceil ((low_window + high_window) / textord_occupancy_threshold);
852  if (low_window + high_window < line_count) {
853  for (sum = 0, high_index = 0; high_index < low_window; high_index++)
854  sum += occupation[high_index];
855  for (low_index = 0; low_index < high_window; low_index++, high_index++)
856  sum += occupation[high_index];
857  min_occ = occupation[0];
858  min_index = 0;
859  for (test_index = 1; test_index < high_index; test_index++) {
860  if (occupation[test_index] <= min_occ) {
861  min_occ = occupation[test_index];
862  min_index = test_index; //find min in region
863  }
864  }
865  for (line_index = 0; line_index < low_window; line_index++)
866  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
867  //same out to end
868  for (low_index = 0; high_index < line_count; low_index++, high_index++) {
869  sum -= occupation[low_index];
870  sum += occupation[high_index];
871  if (occupation[high_index] <= min_occ) {
872  //find min in region
873  min_occ = occupation[high_index];
874  min_index = high_index;
875  }
876  //lost min from region
877  if (min_index <= low_index) {
878  min_occ = occupation[low_index + 1];
879  min_index = low_index + 1;
880  for (test_index = low_index + 2; test_index <= high_index;
881  test_index++) {
882  if (occupation[test_index] <= min_occ) {
883  min_occ = occupation[test_index];
884  //find min in region
885  min_index = test_index;
886  }
887  }
888  }
889  thresholds[line_index++] = (sum - min_occ) / divisor + min_occ;
890  }
891  }
892  else {
893  min_occ = occupation[0];
894  min_index = 0;
895  for (sum = 0, low_index = 0; low_index < line_count; low_index++) {
896  if (occupation[low_index] < min_occ) {
897  min_occ = occupation[low_index];
898  min_index = low_index;
899  }
900  sum += occupation[low_index];
901  }
902  line_index = 0;
903  }
904  for (; line_index < line_count; line_index++)
905  thresholds[line_index] = (sum - min_occ) / divisor + min_occ;
906  //same out to end
907 }
double textord_occupancy_threshold
Definition: makerow.cpp:86
int inT32
Definition: host.h:35

◆ compute_page_skew()

void compute_page_skew ( TO_BLOCK_LIST *  blocks,
float &  page_m,
float &  page_err 
)

Definition at line 287 of file makerow.cpp.

291  {
292  inT32 row_count; //total rows
293  inT32 blob_count; //total_blobs
294  inT32 row_err; //integer error
295  float *gradients; //of rows
296  float *errors; //of rows
297  inT32 row_index; //of total
298  TO_ROW *row; //current row
299  TO_BLOCK_IT block_it = blocks; //iterator
300  TO_ROW_IT row_it;
301 
302  row_count = 0;
303  blob_count = 0;
304  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
305  block_it.forward ()) {
306  POLY_BLOCK* pb = block_it.data()->block->poly_block();
307  if (pb != NULL && !pb->IsText())
308  continue; // Pretend non-text blocks don't exist.
309  row_count += block_it.data ()->get_rows ()->length ();
310  //count up rows
311  row_it.set_to_list (block_it.data ()->get_rows ());
312  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
313  blob_count += row_it.data ()->blob_list ()->length ();
314  }
315  if (row_count == 0) {
316  page_m = 0.0f;
317  page_err = 0.0f;
318  return;
319  }
320  gradients = (float *) alloc_mem (blob_count * sizeof (float));
321  //get mem
322  errors = (float *) alloc_mem (blob_count * sizeof (float));
323  if (gradients == NULL || errors == NULL)
324  MEMORY_OUT.error ("compute_page_skew", ABORT, NULL);
325 
326  row_index = 0;
327  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
328  block_it.forward ()) {
329  POLY_BLOCK* pb = block_it.data()->block->poly_block();
330  if (pb != NULL && !pb->IsText())
331  continue; // Pretend non-text blocks don't exist.
332  row_it.set_to_list (block_it.data ()->get_rows ());
333  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
334  row = row_it.data ();
335  blob_count = row->blob_list ()->length ();
336  row_err = (inT32) ceil (row->line_error ());
337  if (row_err <= 0)
338  row_err = 1;
340  blob_count /= row_err;
341  for (blob_count /= row_err; blob_count > 0; blob_count--) {
342  gradients[row_index] = row->line_m ();
343  errors[row_index] = row->line_error ();
344  row_index++;
345  }
346  }
347  else if (blob_count >= textord_min_blobs_in_row) {
348  //get gradient
349  gradients[row_index] = row->line_m ();
350  errors[row_index] = row->line_error ();
351  row_index++;
352  }
353  }
354  }
355  if (row_index == 0) {
356  //desperate
357  for (block_it.mark_cycle_pt (); !block_it.cycled_list ();
358  block_it.forward ()) {
359  POLY_BLOCK* pb = block_it.data()->block->poly_block();
360  if (pb != NULL && !pb->IsText())
361  continue; // Pretend non-text blocks don't exist.
362  row_it.set_to_list (block_it.data ()->get_rows ());
363  for (row_it.mark_cycle_pt (); !row_it.cycled_list ();
364  row_it.forward ()) {
365  row = row_it.data ();
366  gradients[row_index] = row->line_m ();
367  errors[row_index] = row->line_error ();
368  row_index++;
369  }
370  }
371  }
372  row_count = row_index;
373  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
374  gradients, row_count);
375  page_m = gradients[row_index];
376  row_index = choose_nth_item ((inT32) (row_count * textord_skew_ile),
377  errors, row_count);
378  page_err = errors[row_index];
379  free_mem(gradients);
380  free_mem(errors);
381 }
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
double textord_skew_ile
Definition: makerow.cpp:74
float line_error() const
Definition: blobbox.h:572
int textord_min_blobs_in_row
Definition: makerow.cpp:64
Definition: errcode.h:30
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:638
float line_m() const
Definition: blobbox.h:566
bool textord_biased_skewcalc
Definition: makerow.cpp:58
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
int inT32
Definition: host.h:35
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:40
bool IsText() const
Definition: polyblk.h:52
void free_mem(void *oldchunk)
Definition: memry.cpp:55

◆ compute_row_descdrop()

inT32 compute_row_descdrop ( TO_ROW row,
float  gradient,
int  xheight_blob_count,
STATS asc_heights 
)

Definition at line 1580 of file makerow.cpp.

1581  {
1582  // Count how many potential ascenders are in this row.
1583  int i_min = asc_heights->min_bucket();
1584  if ((i_min / row->xheight) < textord_ascx_ratio_min) {
1585  i_min = static_cast<int>(
1586  floor(row->xheight * textord_ascx_ratio_min + 0.5));
1587  }
1588  int i_max = asc_heights->max_bucket();
1589  if ((i_max / row->xheight) > textord_ascx_ratio_max) {
1590  i_max = static_cast<int>(floor(row->xheight * textord_ascx_ratio_max));
1591  }
1592  int num_potential_asc = 0;
1593  for (int i = i_min; i <= i_max; ++i) {
1594  num_potential_asc += asc_heights->pile_count(i);
1595  }
1596  inT32 min_height =
1597  static_cast<inT32>(floor(row->xheight * textord_descx_ratio_min + 0.5));
1598  inT32 max_height =
1599  static_cast<inT32>(floor(row->xheight * textord_descx_ratio_max));
1600  float xcentre; // centre of blob
1601  float height; // height of blob
1602  BLOBNBOX_IT blob_it = row->blob_list();
1603  BLOBNBOX *blob; // current blob
1604  STATS heights (min_height, max_height + 1);
1605  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1606  blob = blob_it.data();
1607  if (!blob->joined_to_prev()) {
1608  xcentre = (blob->bounding_box().left() +
1609  blob->bounding_box().right()) / 2.0f;
1610  height = (gradient * xcentre + row->parallel_c() -
1611  blob->bounding_box().bottom());
1612  if (height >= min_height && height <= max_height)
1613  heights.add(static_cast<int>(floor(height + 0.5)), 1);
1614  }
1615  }
1616  int blob_index = heights.mode(); // find mode
1617  int blob_count = heights.pile_count(blob_index); // get count of mode
1618  float total_fraction =
1620  if (static_cast<float>(blob_count + num_potential_asc) <
1621  xheight_blob_count * total_fraction) {
1622  blob_count = 0;
1623  }
1624  int descdrop = blob_count > 0 ? -blob_index : 0;
1625  if (textord_debug_xheights) {
1626  tprintf("Descdrop: %d (potential ascenders %d, descenders %d)\n",
1627  descdrop, num_potential_asc, blob_count);
1628  heights.print();
1629  }
1630  return descdrop;
1631 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
bool textord_debug_xheights
Definition: makerow.cpp:57
inT32 min_bucket() const
Definition: statistc.cpp:206
inT32 max_bucket() const
Definition: statistc.cpp:221
double textord_ascx_ratio_min
Definition: makerow.cpp:96
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 bottom() const
Definition: rect.h:61
bool joined_to_prev() const
Definition: blobbox.h:241
double textord_descx_ratio_min
Definition: makerow.cpp:98
inT16 left() const
Definition: rect.h:68
double textord_ascheight_mode_fraction
Definition: makerow.cpp:93
int inT32
Definition: host.h:35
double textord_descheight_mode_fraction
Definition: makerow.cpp:95
#define tprintf(...)
Definition: tprintf.h:31
float xheight
Definition: blobbox.h:653
double textord_ascx_ratio_max
Definition: makerow.cpp:97
inT32 pile_count(inT32 value) const
Definition: statistc.h:78
float parallel_c() const
Definition: blobbox.h:575
double textord_descx_ratio_max
Definition: makerow.cpp:99
inT16 right() const
Definition: rect.h:75
Definition: statistc.h:33

◆ compute_row_stats()

void compute_row_stats ( TO_BLOCK block,
BOOL8  testing_on 
)

Definition at line 1156 of file makerow.cpp.

1159  {
1160  inT32 row_index; //of median
1161  TO_ROW *row; //current row
1162  TO_ROW *prev_row; //previous row
1163  float iqr; //inter quartile range
1164  TO_ROW_IT row_it = block->get_rows ();
1165  //number of rows
1166  inT16 rowcount = row_it.length ();
1167  TO_ROW **rows; //for choose nth
1168 
1169  rows = (TO_ROW **) alloc_mem (rowcount * sizeof (TO_ROW *));
1170  if (rows == NULL)
1171  MEMORY_OUT.error ("compute_row_stats", ABORT, NULL);
1172  rowcount = 0;
1173  prev_row = NULL;
1174  row_it.move_to_last (); //start at bottom
1175  do {
1176  row = row_it.data ();
1177  if (prev_row != NULL) {
1178  rows[rowcount++] = prev_row;
1179  prev_row->spacing = row->intercept () - prev_row->intercept ();
1180  if (testing_on)
1181  tprintf ("Row at %g yields spacing of %g\n",
1182  row->intercept (), prev_row->spacing);
1183  }
1184  prev_row = row;
1185  row_it.backward ();
1186  }
1187  while (!row_it.at_last ());
1188  block->key_row = prev_row;
1189  block->baseline_offset =
1190  fmod (prev_row->parallel_c (), block->line_spacing);
1191  if (testing_on)
1192  tprintf ("Blob based spacing=(%g,%g), offset=%g",
1193  block->line_size, block->line_spacing, block->baseline_offset);
1194  if (rowcount > 0) {
1195  row_index = choose_nth_item (rowcount * 3 / 4, rows, rowcount,
1196  sizeof (TO_ROW *), row_spacing_order);
1197  iqr = rows[row_index]->spacing;
1198  row_index = choose_nth_item (rowcount / 4, rows, rowcount,
1199  sizeof (TO_ROW *), row_spacing_order);
1200  iqr -= rows[row_index]->spacing;
1201  row_index = choose_nth_item (rowcount / 2, rows, rowcount,
1202  sizeof (TO_ROW *), row_spacing_order);
1203  block->key_row = rows[row_index];
1204  if (testing_on)
1205  tprintf (" row based=%g(%g)", rows[row_index]->spacing, iqr);
1206  if (rowcount > 2
1207  && iqr < rows[row_index]->spacing * textord_linespace_iqrlimit) {
1209  if (rows[row_index]->spacing < block->line_spacing
1210  && rows[row_index]->spacing > block->line_size)
1211  //within range
1212  block->line_size = rows[row_index]->spacing;
1213  //spacing=size
1214  else if (rows[row_index]->spacing > block->line_spacing)
1215  block->line_size = block->line_spacing;
1216  //too big so use max
1217  }
1218  else {
1219  if (rows[row_index]->spacing < block->line_spacing)
1220  block->line_size = rows[row_index]->spacing;
1221  else
1222  block->line_size = block->line_spacing;
1223  //too big so use max
1224  }
1225  if (block->line_size < textord_min_xheight)
1226  block->line_size = (float) textord_min_xheight;
1227  block->line_spacing = rows[row_index]->spacing;
1228  block->max_blob_size =
1230  }
1231  block->baseline_offset = fmod (rows[row_index]->intercept (),
1232  block->line_spacing);
1233  }
1234  if (testing_on)
1235  tprintf ("\nEstimate line size=%g, spacing=%g, offset=%g\n",
1236  block->line_size, block->line_spacing, block->baseline_offset);
1237  free_mem(rows);
1238 }
bool textord_new_initial_xheight
Definition: makerow.cpp:102
float spacing
Definition: blobbox.h:652
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
short inT16
Definition: host.h:33
int textord_min_xheight
Definition: makerow.cpp:69
float line_spacing
Definition: blobbox.h:775
double textord_linespace_iqrlimit
Definition: makerow.cpp:76
int row_spacing_order(const void *item1, const void *item2)
Definition: makerow.cpp:2635
float line_size
Definition: blobbox.h:781
Definition: errcode.h:30
float baseline_offset
Definition: blobbox.h:783
double textord_excess_blobsize
Definition: makerow.cpp:85
inT32 choose_nth_item(inT32 index, float *array, inT32 count)
Definition: statistc.cpp:638
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
TO_ROW * key_row
Definition: blobbox.h:794
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
int inT32
Definition: host.h:35
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:40
#define tprintf(...)
Definition: tprintf.h:31
float parallel_c() const
Definition: blobbox.h:575
void free_mem(void *oldchunk)
Definition: memry.cpp:55
float max_blob_size
Definition: blobbox.h:782
float intercept() const
Definition: blobbox.h:584

◆ compute_xheight_from_modes()

int compute_xheight_from_modes ( STATS heights,
STATS floating_heights,
bool  cap_only,
int  min_height,
int  max_height,
float *  xheight,
float *  ascrise 
)

Definition at line 1484 of file makerow.cpp.

1486  {
1487  int blob_index = heights->mode(); // find mode
1488  int blob_count = heights->pile_count(blob_index); // get count of mode
1489  if (textord_debug_xheights) {
1490  tprintf("min_height=%d, max_height=%d, mode=%d, count=%d, total=%d\n",
1491  min_height, max_height, blob_index, blob_count,
1492  heights->get_total());
1493  heights->print();
1494  floating_heights->print();
1495  }
1496  if (blob_count == 0) return 0;
1497  int modes[MAX_HEIGHT_MODES]; // biggest piles
1498  bool in_best_pile = FALSE;
1499  int prev_size = -MAX_INT32;
1500  int best_count = 0;
1501  int mode_count = compute_height_modes(heights, min_height, max_height,
1502  modes, MAX_HEIGHT_MODES);
1503  if (cap_only && mode_count > 1)
1504  mode_count = 1;
1505  int x;
1506  if (textord_debug_xheights) {
1507  tprintf("found %d modes: ", mode_count);
1508  for (x = 0; x < mode_count; x++) tprintf("%d ", modes[x]);
1509  tprintf("\n");
1510  }
1511 
1512  for (x = 0; x < mode_count - 1; x++) {
1513  if (modes[x] != prev_size + 1)
1514  in_best_pile = FALSE; // had empty height
1515  int modes_x_count = heights->pile_count(modes[x]) -
1516  floating_heights->pile_count(modes[x]);
1517  if ((modes_x_count >= blob_count * textord_xheight_mode_fraction) &&
1518  (in_best_pile || modes_x_count > best_count)) {
1519  for (int asc = x + 1; asc < mode_count; asc++) {
1520  float ratio =
1521  static_cast<float>(modes[asc]) / static_cast<float>(modes[x]);
1522  if (textord_ascx_ratio_min < ratio &&
1523  ratio < textord_ascx_ratio_max &&
1524  (heights->pile_count(modes[asc]) >=
1525  blob_count * textord_ascheight_mode_fraction)) {
1526  if (modes_x_count > best_count) {
1527  in_best_pile = true;
1528  best_count = modes_x_count;
1529  }
1530  if (textord_debug_xheights) {
1531  tprintf("X=%d, asc=%d, count=%d, ratio=%g\n",
1532  modes[x], modes[asc]-modes[x], modes_x_count, ratio);
1533  }
1534  prev_size = modes[x];
1535  *xheight = static_cast<float>(modes[x]);
1536  *ascrise = static_cast<float>(modes[asc] - modes[x]);
1537  }
1538  }
1539  }
1540  }
1541  if (*xheight == 0) { // single mode
1542  // Remove counts of the "floating" blobs (the one whose height is too
1543  // small in relation to it's top end of the bounding box) from heights
1544  // before computing the single-mode xheight.
1545  // Restore the counts in heights after the mode is found, since
1546  // floating blobs might be useful for determining potential ascenders
1547  // in compute_row_descdrop().
1548  if (floating_heights->get_total() > 0) {
1549  for (x = min_height; x < max_height; ++x) {
1550  heights->add(x, -(floating_heights->pile_count(x)));
1551  }
1552  blob_index = heights->mode(); // find the modified mode
1553  for (x = min_height; x < max_height; ++x) {
1554  heights->add(x, floating_heights->pile_count(x));
1555  }
1556  }
1557  *xheight = static_cast<float>(blob_index);
1558  *ascrise = 0.0f;
1559  best_count = heights->pile_count(blob_index);
1561  tprintf("Single mode xheight set to %g\n", *xheight);
1562  } else if (textord_debug_xheights) {
1563  tprintf("Multi-mode xheight set to %g, asc=%g\n", *xheight, *ascrise);
1564  }
1565  return best_count;
1566 }
bool textord_debug_xheights
Definition: makerow.cpp:57
inT32 mode() const
Definition: statistc.cpp:115
inT32 compute_height_modes(STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes)
Definition: makerow.cpp:1640
#define MAX_HEIGHT_MODES
Definition: makerow.cpp:105
void add(inT32 value, inT32 count)
Definition: statistc.cpp:101
double textord_ascx_ratio_min
Definition: makerow.cpp:96
double textord_xheight_mode_fraction
Definition: makerow.cpp:91
inT32 get_total() const
Definition: statistc.h:86
void print() const
Definition: statistc.cpp:534
#define FALSE
Definition: capi.h:46
double textord_ascheight_mode_fraction
Definition: makerow.cpp:93
#define tprintf(...)
Definition: tprintf.h:31
#define MAX_INT32
Definition: host.h:53
double textord_ascx_ratio_max
Definition: makerow.cpp:97
inT32 pile_count(inT32 value) const
Definition: statistc.h:78

◆ correct_row_xheight()

void correct_row_xheight ( TO_ROW row,
float  xheight,
float  ascrise,
float  descdrop 
)

Definition at line 1702 of file makerow.cpp.

1703  {
1704  ROW_CATEGORY row_category = get_row_category(row);
1705  if (textord_debug_xheights) {
1706  tprintf("correcting row xheight: row->xheight %.4f"
1707  ", row->acrise %.4f row->descdrop %.4f\n",
1708  row->xheight, row->ascrise, row->descdrop);
1709  }
1710  bool normal_xheight =
1712  bool cap_xheight =
1713  within_error_margin(row->xheight, xheight + ascrise,
1715  // Use the average xheight/ascrise for the following cases:
1716  // -- the xheight of the row could not be determined at all
1717  // -- the row has descenders (e.g. "many groups", "ISBN 12345 p.3")
1718  // and its xheight is close to either cap height or average xheight
1719  // -- the row does not have ascenders or descenders, but its xheight
1720  // is close to the average block xheight (e.g. row with "www.mmm.com")
1721  if (row_category == ROW_ASCENDERS_FOUND) {
1722  if (row->descdrop >= 0.0) {
1723  row->descdrop = row->xheight * (descdrop / xheight);
1724  }
1725  } else if (row_category == ROW_INVALID ||
1726  (row_category == ROW_DESCENDERS_FOUND &&
1727  (normal_xheight || cap_xheight)) ||
1728  (row_category == ROW_UNKNOWN && normal_xheight)) {
1729  if (textord_debug_xheights) tprintf("using average xheight\n");
1730  row->xheight = xheight;
1731  row->ascrise = ascrise;
1732  row->descdrop = descdrop;
1733  } else if (row_category == ROW_DESCENDERS_FOUND) {
1734  // Assume this is a row with mostly lowercase letters and it's xheight
1735  // is computed correctly (unfortunately there is no way to distinguish
1736  // this from the case when descenders are found, but the most common
1737  // height is capheight).
1738  if (textord_debug_xheights) tprintf("lowercase, corrected ascrise\n");
1739  row->ascrise = row->xheight * (ascrise / xheight);
1740  } else if (row_category == ROW_UNKNOWN) {
1741  // Otherwise assume this row is an all-caps or small-caps row
1742  // and adjust xheight and ascrise of the row.
1743 
1744  row->all_caps = true;
1745  if (cap_xheight) { // regular all caps
1746  if (textord_debug_xheights) tprintf("all caps\n");
1747  row->xheight = xheight;
1748  row->ascrise = ascrise;
1749  row->descdrop = descdrop;
1750  } else { // small caps or caps with an odd xheight
1751  if (textord_debug_xheights) {
1752  if (row->xheight < xheight + ascrise && row->xheight > xheight) {
1753  tprintf("small caps\n");
1754  } else {
1755  tprintf("all caps with irregular xheight\n");
1756  }
1757  }
1758  row->ascrise = row->xheight * (ascrise / (xheight + ascrise));
1759  row->xheight -= row->ascrise;
1760  row->descdrop = row->xheight * (descdrop / xheight);
1761  }
1762  }
1763  if (textord_debug_xheights) {
1764  tprintf("corrected row->xheight = %.4f, row->acrise = %.4f, row->descdrop"
1765  " = %.4f\n", row->xheight, row->ascrise, row->descdrop);
1766  }
1767 }
bool textord_debug_xheights
Definition: makerow.cpp:57
BOOL8 all_caps
Definition: blobbox.h:642
bool within_error_margin(float test, float num, float margin)
Definition: makerow.h:129
float ascrise
Definition: blobbox.h:655
#define tprintf(...)
Definition: tprintf.h:31
ROW_CATEGORY
Definition: makerow.h:36
float xheight
Definition: blobbox.h:653
ROW_CATEGORY get_row_category(const TO_ROW *row)
Definition: makerow.h:123
double textord_xheight_error_margin
Definition: makerow.cpp:100
float descdrop
Definition: blobbox.h:656

◆ delete_non_dropout_rows()

void delete_non_dropout_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

delete_non_dropout_rows

Compute the linespacing and offset.

Definition at line 577 of file makerow.cpp.

583  {
584  TBOX block_box; //deskewed block
585  inT32 *deltas; //change in occupation
586  inT32 *occupation; //of pixel coords
587  inT32 max_y; //in block
588  inT32 min_y;
589  inT32 line_index; //of scan line
590  inT32 line_count; //no of scan lines
591  inT32 distance; //to drop-out
592  inT32 xleft; //of block
593  inT32 ybottom; //of block
594  TO_ROW *row; //current row
595  TO_ROW_IT row_it = block->get_rows ();
596  BLOBNBOX_IT blob_it = &block->blobs;
597 
598  if (row_it.length () == 0)
599  return; //empty block
600  block_box = deskew_block_coords (block, gradient);
601  xleft = block->block->bounding_box ().left ();
602  ybottom = block->block->bounding_box ().bottom ();
603  min_y = block_box.bottom () - 1;
604  max_y = block_box.top () + 1;
605  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
606  line_index = (inT32) floor (row_it.data ()->intercept ());
607  if (line_index <= min_y)
608  min_y = line_index - 1;
609  if (line_index >= max_y)
610  max_y = line_index + 1;
611  }
612  line_count = max_y - min_y + 1;
613  if (line_count <= 0)
614  return; //empty block
615  deltas = (inT32 *) alloc_mem (line_count * sizeof (inT32));
616  occupation = (inT32 *) alloc_mem (line_count * sizeof (inT32));
617  if (deltas == NULL || occupation == NULL)
618  MEMORY_OUT.error ("compute_line_spacing", ABORT, NULL);
619 
620  compute_line_occupation(block, gradient, min_y, max_y, occupation, deltas);
622  ceil (block->line_spacing *
625  (inT32) ceil (block->line_spacing *
628  max_y - min_y + 1, occupation, deltas);
629 #ifndef GRAPHICS_DISABLED
630  if (testing_on) {
631  draw_occupation(xleft, ybottom, min_y, max_y, occupation, deltas);
632  }
633 #endif
634  compute_dropout_distances(occupation, deltas, line_count);
635  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
636  row = row_it.data ();
637  line_index = (inT32) floor (row->intercept ());
638  distance = deltas[line_index - min_y];
639  if (find_best_dropout_row (row, distance, block->line_spacing / 2,
640  line_index, &row_it, testing_on)) {
641 #ifndef GRAPHICS_DISABLED
642  if (testing_on)
643  plot_parallel_row(row, gradient, block_edge,
644  ScrollView::WHITE, rotation);
645 #endif
646  blob_it.add_list_after (row_it.data ()->blob_list ());
647  delete row_it.extract (); //too far away
648  }
649  }
650  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
651  blob_it.add_list_after (row_it.data ()->blob_list ());
652  }
653 
654  free_mem(deltas);
655  free_mem(occupation);
656 }
const ERRCODE MEMORY_OUT
Definition: stderr.h:25
void draw_occupation(inT32 xleft, inT32 ybottom, inT32 min_y, inT32 max_y, inT32 occupation[], inT32 thresholds[])
Definition: drawtord.cpp:166
void compute_line_occupation(TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas)
Definition: makerow.cpp:781
float line_spacing
Definition: blobbox.h:775
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
BLOCK * block
Definition: blobbox.h:773
void compute_occupation_threshold(inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds)
Definition: makerow.cpp:834
Definition: errcode.h:30
TBOX deskew_block_coords(TO_BLOCK *block, float gradient)
Definition: makerow.cpp:745
inT16 bottom() const
Definition: rect.h:61
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
int inT32
Definition: host.h:35
static const double kAscenderFraction
Definition: ccstruct.h:35
void error(const char *caller, TessErrorLogCode action, const char *format,...) const
Definition: errcode.cpp:40
BOOL8 find_best_dropout_row(TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on)
Definition: makerow.cpp:665
inT16 top() const
Definition: rect.h:54
static const double kXHeightFraction
Definition: ccstruct.h:34
Definition: rect.h:30
static const double kDescenderFraction
Definition: ccstruct.h:33
void free_mem(void *oldchunk)
Definition: memry.cpp:55
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
void compute_dropout_distances(inT32 *occupation, inT32 *thresholds, inT32 line_count)
Definition: makerow.cpp:915
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59
float intercept() const
Definition: blobbox.h:584

◆ deskew_block_coords()

TBOX deskew_block_coords ( TO_BLOCK block,
float  gradient 
)

Definition at line 745 of file makerow.cpp.

748  {
749  TBOX result; //block bounds
750  TBOX blob_box; //of block
751  FCOORD rotation; //deskew vector
752  float length; //of gradient vector
753  TO_ROW_IT row_it = block->get_rows ();
754  TO_ROW *row; //current row
755  BLOBNBOX *blob; //current blob
756  BLOBNBOX_IT blob_it; //iterator
757 
758  length = sqrt (gradient * gradient + 1);
759  rotation = FCOORD (1 / length, -gradient / length);
760  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
761  row = row_it.data ();
762  blob_it.set_to_list (row->blob_list ());
763  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
764  blob_it.forward ()) {
765  blob = blob_it.data ();
766  blob_box = blob->bounding_box ();
767  blob_box.rotate (rotation);//de-skew it
768  result += blob_box;
769  }
770  }
771  return result;
772 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
void rotate(const FCOORD &vec)
Definition: rect.h:189
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
Definition: points.h:189
Definition: rect.h:30

◆ expand_rows()

void expand_rows ( ICOORD  page_tr,
TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 962 of file makerow.cpp.

969  {
970  BOOL8 swallowed_row; //eaten a neighbour
971  float y_max, y_min; //new row limits
972  float y_bottom, y_top; //allowed limits
973  TO_ROW *test_row; //next row
974  TO_ROW *row; //current row
975  //iterators
976  BLOBNBOX_IT blob_it = &block->blobs;
977  TO_ROW_IT row_it = block->get_rows ();
978 
979 #ifndef GRAPHICS_DISABLED
980  if (textord_show_expanded_rows && testing_on) {
981  if (to_win == NULL)
982  create_to_win(page_tr);
983  }
984 #endif
985 
986  adjust_row_limits(block); //shift min,max.
988  if (block->get_rows ()->length () == 0)
989  return;
990  compute_row_stats(block, textord_show_expanded_rows &&testing_on);
991  }
992  assign_blobs_to_rows (block, &gradient, 4, TRUE, FALSE, FALSE);
993  //get real membership
994  if (block->get_rows ()->length () == 0)
995  return;
996  fit_parallel_rows(block,
997  gradient,
998  rotation,
999  block_edge,
1000  textord_show_expanded_rows &&testing_on);
1002  compute_row_stats(block, textord_show_expanded_rows &&testing_on);
1003  row_it.move_to_last ();
1004  do {
1005  row = row_it.data ();
1006  y_max = row->max_y (); //get current limits
1007  y_min = row->min_y ();
1008  y_bottom = row->intercept () - block->line_size * textord_expansion_factor *
1010  y_top = row->intercept () + block->line_size * textord_expansion_factor *
1013  if (y_min > y_bottom) { //expansion allowed
1014  if (textord_show_expanded_rows && testing_on)
1015  tprintf("Expanding bottom of row at %f from %f to %f\n",
1016  row->intercept(), y_min, y_bottom);
1017  //expandable
1018  swallowed_row = TRUE;
1019  while (swallowed_row && !row_it.at_last ()) {
1020  swallowed_row = FALSE;
1021  //get next one
1022  test_row = row_it.data_relative (1);
1023  //overlaps space
1024  if (test_row->max_y () > y_bottom) {
1025  if (test_row->min_y () > y_bottom) {
1026  if (textord_show_expanded_rows && testing_on)
1027  tprintf("Eating row below at %f\n", test_row->intercept());
1028  row_it.forward ();
1029 #ifndef GRAPHICS_DISABLED
1030  if (textord_show_expanded_rows && testing_on)
1031  plot_parallel_row(test_row,
1032  gradient,
1033  block_edge,
1035  rotation);
1036 #endif
1037  blob_it.set_to_list (row->blob_list ());
1038  blob_it.add_list_after (test_row->blob_list ());
1039  //swallow complete row
1040  delete row_it.extract ();
1041  row_it.backward ();
1042  swallowed_row = TRUE;
1043  }
1044  else if (test_row->max_y () < y_min) {
1045  //shorter limit
1046  y_bottom = test_row->max_y ();
1047  if (textord_show_expanded_rows && testing_on)
1048  tprintf("Truncating limit to %f due to touching row at %f\n",
1049  y_bottom, test_row->intercept());
1050  }
1051  else {
1052  y_bottom = y_min; //can't expand it
1053  if (textord_show_expanded_rows && testing_on)
1054  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1055  y_bottom, test_row->intercept());
1056  }
1057  }
1058  }
1059  y_min = y_bottom; //expand it
1060  }
1061  if (y_max < y_top) { //expansion allowed
1062  if (textord_show_expanded_rows && testing_on)
1063  tprintf("Expanding top of row at %f from %f to %f\n",
1064  row->intercept(), y_max, y_top);
1065  swallowed_row = TRUE;
1066  while (swallowed_row && !row_it.at_first ()) {
1067  swallowed_row = FALSE;
1068  //get one above
1069  test_row = row_it.data_relative (-1);
1070  if (test_row->min_y () < y_top) {
1071  if (test_row->max_y () < y_top) {
1072  if (textord_show_expanded_rows && testing_on)
1073  tprintf("Eating row above at %f\n", test_row->intercept());
1074  row_it.backward ();
1075  blob_it.set_to_list (row->blob_list ());
1076 #ifndef GRAPHICS_DISABLED
1077  if (textord_show_expanded_rows && testing_on)
1078  plot_parallel_row(test_row,
1079  gradient,
1080  block_edge,
1082  rotation);
1083 #endif
1084  blob_it.add_list_after (test_row->blob_list ());
1085  //swallow complete row
1086  delete row_it.extract ();
1087  row_it.forward ();
1088  swallowed_row = TRUE;
1089  }
1090  else if (test_row->min_y () < y_max) {
1091  //shorter limit
1092  y_top = test_row->min_y ();
1093  if (textord_show_expanded_rows && testing_on)
1094  tprintf("Truncating limit to %f due to touching row at %f\n",
1095  y_top, test_row->intercept());
1096  }
1097  else {
1098  y_top = y_max; //can't expand it
1099  if (textord_show_expanded_rows && testing_on)
1100  tprintf("Not expanding limit beyond %f due to touching row at %f\n",
1101  y_top, test_row->intercept());
1102  }
1103  }
1104  }
1105  y_max = y_top;
1106  }
1107  //new limits
1108  row->set_limits (y_min, y_max);
1109  row_it.backward ();
1110  }
1111  while (!row_it.at_last ());
1112 }
bool textord_new_initial_xheight
Definition: makerow.cpp:102
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
void set_limits(float new_min, float new_max)
Definition: blobbox.h:618
#define TRUE
Definition: capi.h:45
float min_y() const
Definition: blobbox.h:557
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
float max_y() const
Definition: blobbox.h:554
float line_size
Definition: blobbox.h:781
void compute_row_stats(TO_BLOCK *block, BOOL8 testing_on)
Definition: makerow.cpp:1156
unsigned char BOOL8
Definition: host.h:46
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2296
void adjust_row_limits(TO_BLOCK *block)
Definition: makerow.cpp:1120
double textord_expansion_factor
Definition: makerow.cpp:80
bool textord_show_expanded_rows
Definition: makerow.cpp:47
#define FALSE
Definition: capi.h:46
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void fit_parallel_rows(TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:1948
static const double kAscenderFraction
Definition: ccstruct.h:35
#define tprintf(...)
Definition: tprintf.h:31
static const double kXHeightFraction
Definition: ccstruct.h:34
static const double kDescenderFraction
Definition: ccstruct.h:33
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125
float intercept() const
Definition: blobbox.h:584

◆ fill_heights()

void fill_heights ( TO_ROW row,
float  gradient,
int  min_height,
int  max_height,
STATS heights,
STATS floating_heights 
)

Definition at line 1423 of file makerow.cpp.

1424  {
1425  float xcentre; // centre of blob
1426  float top; // top y coord of blob
1427  float height; // height of blob
1428  BLOBNBOX *blob; // current blob
1429  int repeated_set;
1430  BLOBNBOX_IT blob_it = row->blob_list();
1431  if (blob_it.empty()) return; // no blobs in this row
1432  bool has_rep_chars =
1433  row->rep_chars_marked() && row->num_repeated_sets() > 0;
1434  do {
1435  blob = blob_it.data();
1436  if (!blob->joined_to_prev()) {
1437  xcentre = (blob->bounding_box().left() +
1438  blob->bounding_box().right()) / 2.0f;
1439  top = blob->bounding_box().top();
1440  height = blob->bounding_box().height();
1442  top -= row->baseline.y(xcentre);
1443  else
1444  top -= gradient * xcentre + row->parallel_c();
1445  if (top >= min_height && top <= max_height) {
1446  heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
1447  if (height / top < textord_min_blob_height_fraction) {
1448  floating_heights->add(static_cast<inT32>(floor(top + 0.5)), 1);
1449  }
1450  }
1451  }
1452  // Skip repeated chars, since they are likely to skew the height stats.
1453  if (has_rep_chars && blob->repeated_set() != 0) {
1454  repeated_set = blob->repeated_set();
1455  blob_it.forward();
1456  while (!blob_it.at_first() &&
1457  blob_it.data()->repeated_set() == repeated_set) {
1458  blob_it.forward();
1460  tprintf("Skipping repeated char when computing xheight\n");
1461  }
1462  } else {
1463  blob_it.forward();
1464  }
1465  } while (!blob_it.at_first());
1466 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
bool textord_debug_xheights
Definition: makerow.cpp:57
bool textord_fix_xheight_bug
Definition: makerow.cpp:55
int num_repeated_sets() const
Definition: blobbox.h:633
void add(inT32 value, inT32 count)
Definition: statistc.cpp:101
double y(double x) const
Definition: quspline.cpp:217
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
bool joined_to_prev() const
Definition: blobbox.h:241
inT16 left() const
Definition: rect.h:68
bool rep_chars_marked() const
Definition: blobbox.h:627
int repeated_set() const
Definition: blobbox.h:247
inT16 height() const
Definition: rect.h:104
QSPLINE baseline
Definition: blobbox.h:666
#define tprintf(...)
Definition: tprintf.h:31
inT16 top() const
Definition: rect.h:54
float parallel_c() const
Definition: blobbox.h:575
inT16 right() const
Definition: rect.h:75
double textord_min_blob_height_fraction
Definition: makerow.cpp:89

◆ find_best_dropout_row()

BOOL8 find_best_dropout_row ( TO_ROW row,
inT32  distance,
float  dist_limit,
inT32  line_index,
TO_ROW_IT *  row_it,
BOOL8  testing_on 
)

Definition at line 665 of file makerow.cpp.

672  {
673  inT32 next_index; // of neighbouring row
674  inT32 row_offset; //from current row
675  inT32 abs_dist; //absolute distance
676  inT8 row_inc; //increment to row_index
677  TO_ROW *next_row; //nextious row
678 
679  if (testing_on)
680  tprintf ("Row at %g(%g), dropout dist=%d,",
681  row->intercept (), row->parallel_c (), distance);
682  if (distance < 0) {
683  row_inc = 1;
684  abs_dist = -distance;
685  }
686  else {
687  row_inc = -1;
688  abs_dist = distance;
689  }
690  if (abs_dist > dist_limit) {
691  if (testing_on) {
692  tprintf (" too far - deleting\n");
693  }
694  return TRUE;
695  }
696  if ((distance < 0 && !row_it->at_last ())
697  || (distance >= 0 && !row_it->at_first ())) {
698  row_offset = row_inc;
699  do {
700  next_row = row_it->data_relative (row_offset);
701  next_index = (inT32) floor (next_row->intercept ());
702  if ((distance < 0
703  && next_index < line_index
704  && next_index > line_index + distance + distance)
705  || (distance >= 0
706  && next_index > line_index
707  && next_index < line_index + distance + distance)) {
708  if (testing_on) {
709  tprintf (" nearer neighbour (%d) at %g\n",
710  line_index + distance - next_index,
711  next_row->intercept ());
712  }
713  return TRUE; //other is nearer
714  }
715  else if (next_index == line_index
716  || next_index == line_index + distance + distance) {
717  if (row->believability () <= next_row->believability ()) {
718  if (testing_on) {
719  tprintf (" equal but more believable at %g (%g/%g)\n",
720  next_row->intercept (),
721  row->believability (),
722  next_row->believability ());
723  }
724  return TRUE; //other is more believable
725  }
726  }
727  row_offset += row_inc;
728  }
729  while ((next_index == line_index
730  || next_index == line_index + distance + distance)
731  && row_offset < row_it->length ());
732  if (testing_on)
733  tprintf (" keeping\n");
734  }
735  return FALSE;
736 }
#define TRUE
Definition: capi.h:45
float believability() const
Definition: blobbox.h:581
#define FALSE
Definition: capi.h:46
SIGNED char inT8
Definition: host.h:31
int inT32
Definition: host.h:35
#define tprintf(...)
Definition: tprintf.h:31
float parallel_c() const
Definition: blobbox.h:575
float intercept() const
Definition: blobbox.h:584

◆ fit_lms_line()

void fit_lms_line ( TO_ROW row)

Definition at line 267 of file makerow.cpp.

267  {
268  float m, c; // fitted line
270  BLOBNBOX_IT blob_it = row->blob_list();
271 
272  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
273  const TBOX& box = blob_it.data()->bounding_box();
274  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
275  }
276  double error = lms.Fit(&m, &c);
277  row->set_line(m, c, error);
278 }
integer coordinate
Definition: points.h:30
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:52
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 bottom() const
Definition: rect.h:61
inT16 left() const
Definition: rect.h:68
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:599
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75

◆ fit_parallel_lms()

void fit_parallel_lms ( float  gradient,
TO_ROW row 
)

Definition at line 1990 of file makerow.cpp.

1990  {
1991  float c; // fitted line
1992  int blobcount; // no of blobs
1994  BLOBNBOX_IT blob_it = row->blob_list();
1995 
1996  blobcount = 0;
1997  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1998  if (!blob_it.data()->joined_to_prev()) {
1999  const TBOX& box = blob_it.data()->bounding_box();
2000  lms.Add(ICOORD((box.left() + box.right()) / 2, box.bottom()));
2001  blobcount++;
2002  }
2003  }
2004  double error = lms.ConstrainedFit(gradient, &c);
2005  row->set_parallel_line(gradient, c, error);
2007  error = lms.Fit(&gradient, &c);
2008  }
2009  //set the other too
2010  row->set_line(gradient, c, error);
2011 }
integer coordinate
Definition: points.h:30
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
bool textord_straight_baselines
Definition: makerow.cpp:52
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:52
void set_parallel_line(float gradient, float new_c, float new_error)
Definition: blobbox.h:607
double ConstrainedFit(const FCOORD &direction, double min_dist, double max_dist, bool debug, ICOORD *line_pt)
Definition: detlinefit.cpp:131
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 bottom() const
Definition: rect.h:61
int textord_lms_line_trials
Definition: makerow.cpp:101
inT16 left() const
Definition: rect.h:68
void set_line(float new_m, float new_c, float new_error)
Definition: blobbox.h:599
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75

◆ fit_parallel_rows()

void fit_parallel_rows ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
inT32  block_edge,
BOOL8  testing_on 
)

Definition at line 1948 of file makerow.cpp.

1954  {
1955 #ifndef GRAPHICS_DISABLED
1956  ScrollView::Color colour; //of row
1957 #endif
1958  TO_ROW_IT row_it = block->get_rows ();
1959 
1960  row_it.move_to_first ();
1961  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1962  if (row_it.data ()->blob_list ()->empty ())
1963  delete row_it.extract (); //nothing in it
1964  else
1965  fit_parallel_lms (gradient, row_it.data ());
1966  }
1967 #ifndef GRAPHICS_DISABLED
1968  if (testing_on) {
1969  colour = ScrollView::RED;
1970  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1971  plot_parallel_row (row_it.data (), gradient,
1972  block_edge, colour, rotation);
1973  colour = (ScrollView::Color) (colour + 1);
1974  if (colour > ScrollView::MAGENTA)
1975  colour = ScrollView::RED;
1976  }
1977  }
1978 #endif
1979  row_it.sort (row_y_order); //may have gone out of order
1980 }
void fit_parallel_lms(float gradient, TO_ROW *row)
Definition: makerow.cpp:1990
int row_y_order(const void *item1, const void *item2)
Definition: makerow.cpp:2613
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void plot_parallel_row(TO_ROW *row, float gradient, inT32 left, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:125

◆ linear_spline_baseline()

double* linear_spline_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2205 of file makerow.cpp.

2210  {
2211  int blobcount; //no of blobs
2212  int blobindex; //current blob
2213  int index1, index2; //blob numbers
2214  int blobs_per_segment; //blobs in each
2215  TBOX box; //blob box
2216  TBOX new_box; //new_it box
2217  //blobs
2218  BLOBNBOX_IT blob_it = row->blob_list ();
2219  BLOBNBOX_IT new_it = blob_it; //front end
2220  float b, c; //fitted curve
2222  double *coeffs; //quadratic coeffs
2223  inT32 segment; //current segment
2224 
2225  box = box_next_pre_chopped (&blob_it);
2226  xstarts[0] = box.left ();
2227  blobcount = 1;
2228  while (!blob_it.at_first ()) {
2229  blobcount++;
2230  box = box_next_pre_chopped (&blob_it);
2231  }
2232  segments = blobcount / textord_spline_medianwin;
2233  if (segments < 1)
2234  segments = 1;
2235  blobs_per_segment = blobcount / segments;
2236  coeffs = (double *) alloc_mem (segments * 3 * sizeof (double));
2237  if (textord_oldbl_debug)
2238  tprintf
2239  ("Linear splining baseline of %d blobs at (%d,%d), into %d segments of %d blobs\n",
2240  blobcount, box.left (), box.bottom (), segments, blobs_per_segment);
2241  segment = 1;
2242  for (index2 = 0; index2 < blobs_per_segment / 2; index2++)
2243  box_next_pre_chopped(&new_it);
2244  index1 = 0;
2245  blobindex = index2;
2246  do {
2247  blobindex += blobs_per_segment;
2248  lms.Clear();
2249  while (index1 < blobindex || (segment == segments && index1 < blobcount)) {
2250  box = box_next_pre_chopped (&blob_it);
2251  int middle = (box.left() + box.right()) / 2;
2252  lms.Add(ICOORD(middle, box.bottom()));
2253  index1++;
2254  if (index1 == blobindex - blobs_per_segment / 2
2255  || index1 == blobcount - 1) {
2256  xstarts[segment] = box.left ();
2257  }
2258  }
2259  lms.Fit(&b, &c);
2260  coeffs[segment * 3 - 3] = 0;
2261  coeffs[segment * 3 - 2] = b;
2262  coeffs[segment * 3 - 1] = c;
2263  segment++;
2264  if (segment > segments)
2265  break;
2266 
2267  blobindex += blobs_per_segment;
2268  lms.Clear();
2269  while (index2 < blobindex || (segment == segments && index2 < blobcount)) {
2270  new_box = box_next_pre_chopped (&new_it);
2271  int middle = (new_box.left() + new_box.right()) / 2;
2272  lms.Add(ICOORD (middle, new_box.bottom()));
2273  index2++;
2274  if (index2 == blobindex - blobs_per_segment / 2
2275  || index2 == blobcount - 1) {
2276  xstarts[segment] = new_box.left ();
2277  }
2278  }
2279  lms.Fit(&b, &c);
2280  coeffs[segment * 3 - 3] = 0;
2281  coeffs[segment * 3 - 2] = b;
2282  coeffs[segment * 3 - 1] = c;
2283  segment++;
2284  }
2285  while (segment <= segments);
2286  return coeffs;
2287 }
integer coordinate
Definition: points.h:30
double Fit(ICOORD *pt1, ICOORD *pt2)
Definition: detlinefit.h:75
void Add(const ICOORD &pt)
Definition: detlinefit.cpp:52
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
inT16 bottom() const
Definition: rect.h:61
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
int textord_spline_medianwin
Definition: makerow.cpp:66
inT16 left() const
Definition: rect.h:68
EXTERN bool textord_oldbl_debug
Definition: oldbasel.cpp:39
int inT32
Definition: host.h:35
#define tprintf(...)
Definition: tprintf.h:31
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:660
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75

◆ make_baseline_spline()

void make_baseline_spline ( TO_ROW row,
TO_BLOCK block 
)

Definition at line 2073 of file makerow.cpp.

2074  {
2075  inT32 *xstarts; // spline boundaries
2076  double *coeffs; // quadratic coeffs
2077  inT32 segments; // no of segments
2078 
2079  xstarts =
2080  (inT32 *) alloc_mem((row->blob_list()->length() + 1) * sizeof(inT32));
2081  if (segment_baseline(row, block, segments, xstarts)
2083  coeffs = linear_spline_baseline(row, block, segments, xstarts);
2084  } else {
2085  xstarts[1] = xstarts[segments];
2086  segments = 1;
2087  coeffs = (double *) alloc_mem (3 * sizeof (double));
2088  coeffs[0] = 0;
2089  coeffs[1] = row->line_m ();
2090  coeffs[2] = row->line_c ();
2091  }
2092  row->baseline = QSPLINE (segments, xstarts, coeffs);
2093  free_mem(coeffs);
2094  free_mem(xstarts);
2095 }
bool textord_straight_baselines
Definition: makerow.cpp:52
BOOL8 segment_baseline(TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
Definition: makerow.cpp:2106
bool textord_parallel_baselines
Definition: makerow.cpp:51
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
float line_m() const
Definition: blobbox.h:566
float line_c() const
Definition: blobbox.h:569
double * linear_spline_baseline(TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[])
Definition: makerow.cpp:2205
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
int inT32
Definition: host.h:35
QSPLINE baseline
Definition: blobbox.h:666
void free_mem(void *oldchunk)
Definition: memry.cpp:55

◆ make_initial_textrows()

void make_initial_textrows ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 227 of file makerow.cpp.

232  {
233  TO_ROW_IT row_it = block->get_rows ();
234 
235 #ifndef GRAPHICS_DISABLED
236  ScrollView::Color colour; //of row
237 
238  if (textord_show_initial_rows && testing_on) {
239  if (to_win == NULL)
240  create_to_win(page_tr);
241  }
242 #endif
243  //guess skew
244  assign_blobs_to_rows (block, NULL, 0, TRUE, TRUE, textord_show_initial_rows && testing_on);
245  row_it.move_to_first ();
246  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ())
247  fit_lms_line (row_it.data ());
248 #ifndef GRAPHICS_DISABLED
249  if (textord_show_initial_rows && testing_on) {
250  colour = ScrollView::RED;
251  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
252  plot_to_row (row_it.data (), colour, rotation);
253  colour = (ScrollView::Color) (colour + 1);
254  if (colour > ScrollView::MAGENTA)
255  colour = ScrollView::RED;
256  }
257  }
258 #endif
259 }
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
#define TRUE
Definition: capi.h:45
bool textord_show_initial_rows
Definition: makerow.cpp:45
void assign_blobs_to_rows(TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew)
Definition: makerow.cpp:2296
void plot_to_row(TO_ROW *row, ScrollView::Color colour, FCOORD rotation)
Definition: drawtord.cpp:91
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:267
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38

◆ make_rows()

float make_rows ( ICOORD  page_tr,
TO_BLOCK_LIST *  port_blocks 
)

Definition at line 201 of file makerow.cpp.

201  {
202  float port_m; // global skew
203  float port_err; // global noise
204  TO_BLOCK_IT block_it; // iterator
205 
206  block_it.set_to_list(port_blocks);
207  for (block_it.mark_cycle_pt(); !block_it.cycled_list();
208  block_it.forward())
209  make_initial_textrows(page_tr, block_it.data(), FCOORD(1.0f, 0.0f),
211  // compute globally
212  compute_page_skew(port_blocks, port_m, port_err);
213  block_it.set_to_list(port_blocks);
214  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
215  cleanup_rows_making(page_tr, block_it.data(), port_m, FCOORD(1.0f, 0.0f),
216  block_it.data()->block->bounding_box().left(),
218  }
219  return port_m; // global skew
220 }
bool textord_test_landscape
Definition: makerow.cpp:50
void cleanup_rows_making(ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on)
Definition: makerow.cpp:524
unsigned char BOOL8
Definition: host.h:46
void make_initial_textrows(ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on)
Definition: makerow.cpp:227
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:287
Definition: points.h:189

◆ make_single_row()

float make_single_row ( ICOORD  page_tr,
bool  allow_sub_blobs,
TO_BLOCK block,
TO_BLOCK_LIST *  blocks 
)

Definition at line 164 of file makerow.cpp.

165  {
166  BLOBNBOX_IT blob_it = &block->blobs;
167  TO_ROW_IT row_it = block->get_rows();
168 
169  // Include all the small blobs and large blobs.
170  blob_it.add_list_after(&block->small_blobs);
171  blob_it.add_list_after(&block->noise_blobs);
172  blob_it.add_list_after(&block->large_blobs);
173  if (block->blobs.singleton() && allow_sub_blobs) {
174  blob_it.move_to_first();
175  float size = MakeRowFromSubBlobs(block, blob_it.data()->cblob(), &row_it);
176  if (size > block->line_size)
177  block->line_size = size;
178  } else if (block->blobs.empty()) {
179  // Make a fake blob.
180  C_BLOB* blob = C_BLOB::FakeBlob(block->block->bounding_box());
181  // The blobnbox owns the blob.
182  BLOBNBOX* bblob = new BLOBNBOX(blob);
183  blob_it.add_after_then_move(bblob);
184  }
185  MakeRowFromBlobs(block->line_size, &blob_it, &row_it);
186  // Fit an LMS line to the rows.
187  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward())
188  fit_lms_line(row_it.data());
189  float gradient;
190  float fit_error;
191  // Compute the skew based on the fitted line.
192  compute_page_skew(blocks, gradient, fit_error);
193  return gradient;
194 }
BLOBNBOX_LIST blobs
Definition: blobbox.h:768
float MakeRowFromSubBlobs(TO_BLOCK *block, C_BLOB *blob, TO_ROW_IT *row_it)
Definition: makerow.cpp:137
BLOCK * block
Definition: blobbox.h:773
float line_size
Definition: blobbox.h:781
void compute_page_skew(TO_BLOCK_LIST *blocks, float &page_m, float &page_err)
Definition: makerow.cpp:287
BLOBNBOX_LIST noise_blobs
Definition: blobbox.h:770
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
void fit_lms_line(TO_ROW *row)
Definition: makerow.cpp:267
static C_BLOB * FakeBlob(const TBOX &box)
Definition: stepblob.cpp:238
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
Definition: pdblock.h:59

◆ MakeRowFromSubBlobs()

float MakeRowFromSubBlobs ( TO_BLOCK block,
C_BLOB blob,
TO_ROW_IT *  row_it 
)

Definition at line 137 of file makerow.cpp.

137  {
138  // The blobs made from the children will go in the small_blobs list.
139  BLOBNBOX_IT bb_it(&block->small_blobs);
140  C_OUTLINE_IT ol_it(blob->out_list());
141  // Get the children.
142  ol_it.set_to_list(ol_it.data()->child());
143  if (ol_it.empty())
144  return 0.0f;
145  for (ol_it.mark_cycle_pt(); !ol_it.cycled_list(); ol_it.forward()) {
146  // Deep copy the child outline and use that to make a blob.
147  C_BLOB* blob = new C_BLOB(C_OUTLINE::deep_copy(ol_it.data()));
148  // Correct direction as needed.
150  BLOBNBOX* bbox = new BLOBNBOX(blob);
151  bb_it.add_after_then_move(bbox);
152  }
153  // Now we can make a row from the blobs.
154  return MakeRowFromBlobs(block->line_size, &bb_it, row_it);
155 }
float line_size
Definition: blobbox.h:781
C_OUTLINE_LIST * out_list()
Definition: stepblob.h:64
BLOBNBOX_LIST small_blobs
Definition: blobbox.h:771
void CheckInverseFlagAndDirection()
Definition: stepblob.cpp:221
static C_OUTLINE * deep_copy(const C_OUTLINE *src)
Definition: coutln.h:259

◆ mark_repeated_chars()

void mark_repeated_chars ( TO_ROW row)

Definition at line 2657 of file makerow.cpp.

2657  {
2658  BLOBNBOX_IT box_it(row->blob_list()); // Iterator.
2659  int num_repeated_sets = 0;
2660  if (!box_it.empty()) {
2661  do {
2662  BLOBNBOX* bblob = box_it.data();
2663  int repeat_length = 1;
2664  if (bblob->flow() == BTFT_LEADER &&
2665  !bblob->joined_to_prev() && bblob->cblob() != NULL) {
2666  BLOBNBOX_IT test_it(box_it);
2667  for (test_it.forward(); !test_it.at_first();) {
2668  bblob = test_it.data();
2669  if (bblob->flow() != BTFT_LEADER)
2670  break;
2671  test_it.forward();
2672  bblob = test_it.data();
2673  if (bblob->joined_to_prev() || bblob->cblob() == NULL) {
2674  repeat_length = 0;
2675  break;
2676  }
2677  ++repeat_length;
2678  }
2679  }
2680  if (repeat_length >= kMinLeaderCount) {
2681  num_repeated_sets++;
2682  for (; repeat_length > 0; box_it.forward(), --repeat_length) {
2683  bblob = box_it.data();
2684  bblob->set_repeated_set(num_repeated_sets);
2685  }
2686  } else {
2687  bblob->set_repeated_set(0);
2688  box_it.forward();
2689  }
2690  } while (!box_it.at_first()); // until all done
2691  }
2692  row->set_num_repeated_sets(num_repeated_sets);
2693 }
C_BLOB * cblob() const
Definition: blobbox.h:253
BlobTextFlowType flow() const
Definition: blobbox.h:280
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
bool joined_to_prev() const
Definition: blobbox.h:241
void set_num_repeated_sets(int num_sets)
Definition: blobbox.h:636
const int kMinLeaderCount
Definition: makerow.cpp:107
void set_repeated_set(int set_id)
Definition: blobbox.h:250

◆ most_overlapping_row()

OVERLAP_STATE most_overlapping_row ( TO_ROW_IT *  row_it,
TO_ROW *&  best_row,
float  top,
float  bottom,
float  rowsize,
BOOL8  testing_blob 
)

Definition at line 2496 of file makerow.cpp.

2503  {
2504  OVERLAP_STATE result; //result of tests
2505  float overlap; //of blob & row
2506  float bestover; //nearest row
2507  float merge_top, merge_bottom; //size of merged row
2508  ICOORD testpt; //testing only
2509  TO_ROW *row; //current row
2510  TO_ROW *test_row; //for multiple overlaps
2511  BLOBNBOX_IT blob_it; //for merging rows
2512 
2513  result = ASSIGN;
2514  row = row_it->data ();
2515  bestover = top - bottom;
2516  if (top > row->max_y ())
2517  bestover -= top - row->max_y ();
2518  if (bottom < row->min_y ())
2519  //compute overlap
2520  bestover -= row->min_y () - bottom;
2521  if (testing_blob && textord_debug_blob) {
2522  tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f\n",
2523  bottom, top, row->min_y(), row->max_y(), rowsize, bestover);
2524  }
2525  test_row = row;
2526  do {
2527  if (!row_it->at_last ()) {
2528  row_it->forward ();
2529  test_row = row_it->data ();
2530  if (test_row->min_y () <= top && test_row->max_y () >= bottom) {
2531  merge_top =
2532  test_row->max_y () >
2533  row->max_y ()? test_row->max_y () : row->max_y ();
2534  merge_bottom =
2535  test_row->min_y () <
2536  row->min_y ()? test_row->min_y () : row->min_y ();
2537  if (merge_top - merge_bottom <= rowsize) {
2538  if (testing_blob) {
2539  tprintf ("Merging rows at (%g,%g), (%g,%g)\n",
2540  row->min_y (), row->max_y (),
2541  test_row->min_y (), test_row->max_y ());
2542  }
2543  test_row->set_limits (merge_bottom, merge_top);
2544  blob_it.set_to_list (test_row->blob_list ());
2545  blob_it.add_list_after (row->blob_list ());
2546  blob_it.sort (blob_x_order);
2547  row_it->backward ();
2548  delete row_it->extract ();
2549  row_it->forward ();
2550  bestover = -1.0f; //force replacement
2551  }
2552  overlap = top - bottom;
2553  if (top > test_row->max_y ())
2554  overlap -= top - test_row->max_y ();
2555  if (bottom < test_row->min_y ())
2556  overlap -= test_row->min_y () - bottom;
2557  if (bestover >= rowsize - 1 && overlap >= rowsize - 1) {
2558  result = REJECT;
2559  }
2560  if (overlap > bestover) {
2561  bestover = overlap; //find biggest overlap
2562  row = test_row;
2563  }
2564  if (testing_blob && textord_debug_blob) {
2565  tprintf("Test blob y=(%g,%g), row=(%f,%f), size=%g, overlap=%f->%f\n",
2566  bottom, top, test_row->min_y(), test_row->max_y(),
2567  rowsize, overlap, bestover);
2568  }
2569  }
2570  }
2571  }
2572  while (!row_it->at_last ()
2573  && test_row->min_y () <= top && test_row->max_y () >= bottom);
2574  while (row_it->data () != row)
2575  row_it->backward (); //make it point to row
2576  //doesn't overlap much
2577  if (top - bottom - bestover > rowsize * textord_overlap_x &&
2578  (!textord_fix_makerow_bug || bestover < rowsize * textord_overlap_x)
2579  && result == ASSIGN)
2580  result = NEW_ROW; //doesn't overlap enough
2581  best_row = row;
2582  return result;
2583 }
void set_limits(float new_min, float new_max)
Definition: blobbox.h:618
integer coordinate
Definition: points.h:30
float min_y() const
Definition: blobbox.h:557
bool textord_debug_blob
Definition: makerow.cpp:103
float max_y() const
Definition: blobbox.h:554
double textord_overlap_x
Definition: makerow.cpp:81
OVERLAP_STATE
Definition: makerow.h:29
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
int blob_x_order(const void *item1, const void *item2)
Definition: makerow.cpp:2591
Definition: makerow.h:32
#define tprintf(...)
Definition: tprintf.h:31
bool textord_fix_makerow_bug
Definition: makerow.cpp:56
Definition: makerow.h:31

◆ pre_associate_blobs()

void pre_associate_blobs ( ICOORD  page_tr,
TO_BLOCK block,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1862 of file makerow.cpp.

1867  {
1868 #ifndef GRAPHICS_DISABLED
1869  ScrollView::Color colour; //of boxes
1870 #endif
1871  BLOBNBOX *blob; //current blob
1872  BLOBNBOX *nextblob; //next in list
1873  TBOX blob_box;
1874  FCOORD blob_rotation; //inverse of rotation
1875  BLOBNBOX_IT blob_it; //iterator
1876  BLOBNBOX_IT start_it; //iterator
1877  TO_ROW_IT row_it = block->get_rows ();
1878 
1879 #ifndef GRAPHICS_DISABLED
1880  colour = ScrollView::RED;
1881 #endif
1882 
1883  blob_rotation = FCOORD (rotation.x (), -rotation.y ());
1884  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
1885  //get blobs
1886  blob_it.set_to_list (row_it.data ()->blob_list ());
1887  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1888  blob_it.forward ()) {
1889  blob = blob_it.data ();
1890  blob_box = blob->bounding_box ();
1891  start_it = blob_it; //save start point
1892  // if (testing_on && textord_show_final_blobs)
1893  // {
1894  // tprintf("Blob at (%d,%d)->(%d,%d), addr=%x, count=%d\n",
1895  // blob_box.left(),blob_box.bottom(),
1896  // blob_box.right(),blob_box.top(),
1897  // (void*)blob,blob_it.length());
1898  // }
1899  bool overlap;
1900  do {
1901  overlap = false;
1902  if (!blob_it.at_last ()) {
1903  nextblob = blob_it.data_relative(1);
1904  overlap = blob_box.major_x_overlap(nextblob->bounding_box());
1905  if (overlap) {
1906  blob->merge(nextblob); // merge new blob
1907  blob_box = blob->bounding_box(); // get bigger box
1908  blob_it.forward();
1909  }
1910  }
1911  }
1912  while (overlap);
1913  blob->chop (&start_it, &blob_it,
1914  blob_rotation,
1917  //attempt chop
1918  }
1919 #ifndef GRAPHICS_DISABLED
1920  if (testing_on && textord_show_final_blobs) {
1921  if (to_win == NULL)
1922  create_to_win(page_tr);
1923  to_win->Pen(colour);
1924  for (blob_it.mark_cycle_pt (); !blob_it.cycled_list ();
1925  blob_it.forward ()) {
1926  blob = blob_it.data ();
1927  blob_box = blob->bounding_box ();
1928  blob_box.rotate (rotation);
1929  if (!blob->joined_to_prev ()) {
1930  to_win->Rectangle (blob_box.left (), blob_box.bottom (),
1931  blob_box.right (), blob_box.top ());
1932  }
1933  }
1934  colour = (ScrollView::Color) (colour + 1);
1935  if (colour > ScrollView::MAGENTA)
1936  colour = ScrollView::RED;
1937  }
1938 #endif
1939  }
1940 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
void rotate(const FCOORD &vec)
Definition: rect.h:189
ScrollView * create_to_win(ICOORD page_tr)
Definition: drawtord.cpp:47
bool textord_show_final_blobs
Definition: makerow.cpp:49
float line_size
Definition: blobbox.h:781
double textord_chop_width
Definition: makerow.cpp:78
bool major_x_overlap(const TBOX &box) const
Definition: rect.h:402
inT16 bottom() const
Definition: rect.h:61
bool joined_to_prev() const
Definition: blobbox.h:241
inT16 left() const
Definition: rect.h:68
void Pen(Color color)
Definition: scrollview.cpp:726
float y() const
Definition: points.h:212
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void Rectangle(int x1, int y1, int x2, int y2)
Definition: scrollview.cpp:606
Definition: points.h:189
inT16 top() const
Definition: rect.h:54
static const double kXHeightFraction
Definition: ccstruct.h:34
float x() const
Definition: points.h:209
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
void chop(BLOBNBOX_IT *start_it, BLOBNBOX_IT *blob_it, FCOORD rotation, float xheight)
Definition: blobbox.cpp:115
EXTERN ScrollView * to_win
Definition: drawtord.cpp:38
void merge(BLOBNBOX *nextblob)
Definition: blobbox.cpp:87

◆ row_spacing_order()

int row_spacing_order ( const void *  item1,
const void *  item2 
)

Definition at line 2635 of file makerow.cpp.

2637  {
2638  //converted ptr
2639  TO_ROW *row1 = *(TO_ROW **) item1;
2640  //converted ptr
2641  TO_ROW *row2 = *(TO_ROW **) item2;
2642 
2643  if (row1->spacing < row2->spacing)
2644  return -1;
2645  else if (row1->spacing > row2->spacing)
2646  return 1;
2647  else
2648  return 0;
2649 }
float spacing
Definition: blobbox.h:652

◆ row_y_order()

int row_y_order ( const void *  item1,
const void *  item2 
)

Definition at line 2613 of file makerow.cpp.

2615  {
2616  //converted ptr
2617  TO_ROW *row1 = *(TO_ROW **) item1;
2618  //converted ptr
2619  TO_ROW *row2 = *(TO_ROW **) item2;
2620 
2621  if (row1->parallel_c () > row2->parallel_c ())
2622  return -1;
2623  else if (row1->parallel_c () < row2->parallel_c ())
2624  return 1;
2625  else
2626  return 0;
2627 }
float parallel_c() const
Definition: blobbox.h:575

◆ segment_baseline()

BOOL8 segment_baseline ( TO_ROW row,
TO_BLOCK block,
inT32 segments,
inT32  xstarts[] 
)

Definition at line 2106 of file makerow.cpp.

2111  {
2112  BOOL8 needs_curve; //needs curved line
2113  int blobcount; //no of blobs
2114  int blobindex; //current blob
2115  int last_state; //above, on , below
2116  int state; //of current blob
2117  float yshift; //from baseline
2118  TBOX box; //blob box
2119  TBOX new_box; //new_it box
2120  float middle; //xcentre of blob
2121  //blobs
2122  BLOBNBOX_IT blob_it = row->blob_list ();
2123  BLOBNBOX_IT new_it = blob_it; //front end
2124  SORTED_FLOATS yshifts; //shifts from baseline
2125 
2126  needs_curve = FALSE;
2127  box = box_next_pre_chopped (&blob_it);
2128  xstarts[0] = box.left ();
2129  segments = 1;
2130  blobcount = row->blob_list ()->length ();
2131  if (textord_oldbl_debug)
2132  tprintf ("Segmenting baseline of %d blobs at (%d,%d)\n",
2133  blobcount, box.left (), box.bottom ());
2134  if (blobcount <= textord_spline_medianwin
2135  || blobcount < textord_spline_minblobs) {
2136  blob_it.move_to_last ();
2137  box = blob_it.data ()->bounding_box ();
2138  xstarts[1] = box.right ();
2139  return FALSE;
2140  }
2141  last_state = 0;
2142  new_it.mark_cycle_pt ();
2143  for (blobindex = 0; blobindex < textord_spline_medianwin; blobindex++) {
2144  new_box = box_next_pre_chopped (&new_it);
2145  middle = (new_box.left () + new_box.right ()) / 2.0;
2146  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2147  //record shift
2148  yshifts.add (yshift, blobindex);
2149  if (new_it.cycled_list ()) {
2150  xstarts[1] = new_box.right ();
2151  return FALSE;
2152  }
2153  }
2154  for (blobcount = 0; blobcount < textord_spline_medianwin / 2; blobcount++)
2155  box = box_next_pre_chopped (&blob_it);
2156  do {
2157  new_box = box_next_pre_chopped (&new_it);
2158  //get middle one
2159  yshift = yshifts[textord_spline_medianwin / 2];
2160  if (yshift > textord_spline_shift_fraction * block->line_size)
2161  state = 1;
2162  else if (-yshift > textord_spline_shift_fraction * block->line_size)
2163  state = -1;
2164  else
2165  state = 0;
2166  if (state != 0)
2167  needs_curve = TRUE;
2168  // tprintf("State=%d, prev=%d, shift=%g\n",
2169  // state,last_state,yshift);
2170  if (state != last_state && blobcount > textord_spline_minblobs) {
2171  xstarts[segments++] = box.left ();
2172  blobcount = 0;
2173  }
2174  last_state = state;
2175  yshifts.remove (blobindex - textord_spline_medianwin);
2176  box = box_next_pre_chopped (&blob_it);
2177  middle = (new_box.left () + new_box.right ()) / 2.0;
2178  yshift = new_box.bottom () - row->line_m () * middle - row->line_c ();
2179  yshifts.add (yshift, blobindex);
2180  blobindex++;
2181  blobcount++;
2182  }
2183  while (!new_it.cycled_list ());
2184  if (blobcount > textord_spline_minblobs || segments == 1) {
2185  xstarts[segments] = new_box.right ();
2186  }
2187  else {
2188  xstarts[--segments] = new_box.right ();
2189  }
2190  if (textord_oldbl_debug)
2191  tprintf ("Made %d segments on row at (%d,%d)\n",
2192  segments, box.right (), box.bottom ());
2193  return needs_curve;
2194 }
#define TRUE
Definition: capi.h:45
float line_size
Definition: blobbox.h:781
int textord_spline_minblobs
Definition: makerow.cpp:65
unsigned char BOOL8
Definition: host.h:46
void add(float value, inT32 key)
Definition: sortflts.cpp:28
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
float line_m() const
Definition: blobbox.h:566
float line_c() const
Definition: blobbox.h:569
inT16 bottom() const
Definition: rect.h:61
#define FALSE
Definition: capi.h:46
int textord_spline_medianwin
Definition: makerow.cpp:66
inT16 left() const
Definition: rect.h:68
EXTERN bool textord_oldbl_debug
Definition: oldbasel.cpp:39
#define tprintf(...)
Definition: tprintf.h:31
TBOX box_next_pre_chopped(BLOBNBOX_IT *it)
Definition: blobbox.cpp:660
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
void remove(inT32 key)
Definition: sortflts.cpp:53
double textord_spline_shift_fraction
Definition: makerow.cpp:71

◆ separate_underlines()

void separate_underlines ( TO_BLOCK block,
float  gradient,
FCOORD  rotation,
BOOL8  testing_on 
)

Definition at line 1789 of file makerow.cpp.

1792  { // correct orientation
1793  BLOBNBOX *blob; // current blob
1794  C_BLOB *rotated_blob; // rotated blob
1795  TO_ROW *row; // current row
1796  float length; // of g_vec
1797  TBOX blob_box;
1798  FCOORD blob_rotation; // inverse of rotation
1799  FCOORD g_vec; // skew rotation
1800  BLOBNBOX_IT blob_it; // iterator
1801  // iterator
1802  BLOBNBOX_IT under_it = &block->underlines;
1803  BLOBNBOX_IT large_it = &block->large_blobs;
1804  TO_ROW_IT row_it = block->get_rows();
1805  int min_blob_height = static_cast<int>(textord_min_blob_height_fraction *
1806  block->line_size + 0.5);
1807 
1808  // length of vector
1809  length = sqrt(1 + gradient * gradient);
1810  g_vec = FCOORD(1 / length, -gradient / length);
1811  blob_rotation = FCOORD(rotation.x(), -rotation.y());
1812  blob_rotation.rotate(g_vec); // undoing everything
1813  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
1814  row = row_it.data();
1815  // get blobs
1816  blob_it.set_to_list(row->blob_list());
1817  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list();
1818  blob_it.forward()) {
1819  blob = blob_it.data();
1820  blob_box = blob->bounding_box();
1821  if (blob_box.width() > block->line_size * textord_underline_width) {
1822  ASSERT_HOST(blob->cblob() != NULL);
1823  rotated_blob = crotate_cblob (blob->cblob(),
1824  blob_rotation);
1825  if (test_underline(
1826  testing_on && textord_show_final_rows,
1827  rotated_blob, static_cast<inT16>(row->intercept()),
1828  static_cast<inT16>(
1829  block->line_size *
1832  under_it.add_after_then_move(blob_it.extract());
1833  if (testing_on && textord_show_final_rows) {
1834  tprintf("Underlined blob at:");
1835  rotated_blob->bounding_box().print();
1836  tprintf("Was:");
1837  blob_box.print();
1838  }
1839  } else if (CountOverlaps(blob->bounding_box(), min_blob_height,
1840  row->blob_list()) >
1842  large_it.add_after_then_move(blob_it.extract());
1843  if (testing_on && textord_show_final_rows) {
1844  tprintf("Large blob overlaps %d blobs at:",
1845  CountOverlaps(blob_box, min_blob_height,
1846  row->blob_list()));
1847  blob_box.print();
1848  }
1849  }
1850  delete rotated_blob;
1851  }
1852  }
1853  }
1854 }
const TBOX & bounding_box() const
Definition: blobbox.h:215
int textord_max_blob_overlaps
Definition: makerow.cpp:68
C_BLOB * cblob() const
Definition: blobbox.h:253
BOOL8 test_underline(BOOL8 testing_on, C_BLOB *blob, inT16 baseline, inT16 xheight)
Definition: blkocc.cpp:53
inT16 width() const
Definition: rect.h:111
float line_size
Definition: blobbox.h:781
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
BLOBNBOX_LIST underlines
Definition: blobbox.h:769
void rotate(const FCOORD vec)
Definition: ipoints.h:471
BLOBNBOX_LIST large_blobs
Definition: blobbox.h:772
float y() const
Definition: points.h:212
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
void print() const
Definition: rect.h:270
C_BLOB * crotate_cblob(C_BLOB *blob, FCOORD rotation)
Definition: blobbox.cpp:606
bool textord_show_final_rows
Definition: makerow.cpp:48
static const double kAscenderFraction
Definition: ccstruct.h:35
double textord_underline_width
Definition: makerow.cpp:87
#define tprintf(...)
Definition: tprintf.h:31
Definition: points.h:189
static const double kXHeightFraction
Definition: ccstruct.h:34
float x() const
Definition: points.h:209
Definition: rect.h:30
double textord_min_blob_height_fraction
Definition: makerow.cpp:89
#define ASSERT_HOST(x)
Definition: errcode.h:84
float intercept() const
Definition: blobbox.h:584

◆ vigorous_noise_removal()

void vigorous_noise_removal ( TO_BLOCK block)

Definition at line 473 of file makerow.cpp.

473  {
474  TO_ROW_IT row_it = block->get_rows ();
475  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
476  TO_ROW* row = row_it.data();
477  BLOBNBOX_IT b_it = row->blob_list();
478  // Estimate the xheight on the row.
479  int max_height = 0;
480  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
481  BLOBNBOX* blob = b_it.data();
482  if (blob->bounding_box().height() > max_height)
483  max_height = blob->bounding_box().height();
484  }
485  STATS hstats(0, max_height + 1);
486  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
487  BLOBNBOX* blob = b_it.data();
488  int height = blob->bounding_box().height();
489  if (height >= kMinSize)
490  hstats.add(blob->bounding_box().height(), 1);
491  }
492  float xheight = hstats.median();
493  // Delete small objects.
494  BLOBNBOX* prev = NULL;
495  for (b_it.mark_cycle_pt(); !b_it.cycled_list(); b_it.forward()) {
496  BLOBNBOX* blob = b_it.data();
497  const TBOX& box = blob->bounding_box();
498  if (box.height() < kNoiseSize * xheight) {
499  // Small so delete unless it looks like an i dot.
500  if (prev != NULL) {
501  if (dot_of_i(blob, prev, row))
502  continue; // Looks OK.
503  }
504  if (!b_it.at_last()) {
505  BLOBNBOX* next = b_it.data_relative(1);
506  if (dot_of_i(blob, next, row))
507  continue; // Looks OK.
508  }
509  // It might be noise so get rid of it.
510  delete blob->cblob();
511  delete b_it.extract();
512  } else {
513  prev = blob;
514  }
515  }
516  }
517 }
const double kNoiseSize
Definition: makerow.cpp:383
const TBOX & bounding_box() const
Definition: blobbox.h:215
C_BLOB * cblob() const
Definition: blobbox.h:253
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
inT16 height() const
Definition: rect.h:104
Definition: rect.h:30
Definition: statistc.h:33
const int kMinSize
Definition: makerow.cpp:384

Variable Documentation

◆ kMinLeaderCount

const int kMinLeaderCount = 5

Definition at line 107 of file makerow.cpp.

◆ kMinSize

const int kMinSize = 8

Definition at line 384 of file makerow.cpp.

◆ kNoiseSize

const double kNoiseSize = 0.5

Definition at line 383 of file makerow.cpp.

◆ textord_ascheight_mode_fraction

double textord_ascheight_mode_fraction = 0.08

"Min pile height to make ascheight"

Definition at line 93 of file makerow.cpp.

◆ textord_ascx_ratio_max

double textord_ascx_ratio_max = 1.8

"Max cap/xheight"

Definition at line 97 of file makerow.cpp.

◆ textord_ascx_ratio_min

double textord_ascx_ratio_min = 1.25

"Min cap/xheight"

Definition at line 96 of file makerow.cpp.

◆ textord_biased_skewcalc

bool textord_biased_skewcalc = TRUE

"Bias skew estimates with line length"

Definition at line 58 of file makerow.cpp.

◆ textord_chop_width

double textord_chop_width = 1.5

"Max width before chopping"

Definition at line 78 of file makerow.cpp.

◆ textord_debug_blob

bool textord_debug_blob = FALSE

"Print test blob information"

Definition at line 103 of file makerow.cpp.

◆ textord_debug_xheights

bool textord_debug_xheights = FALSE

"Test xheight algorithms"

Definition at line 57 of file makerow.cpp.

◆ textord_descheight_mode_fraction

double textord_descheight_mode_fraction = 0.08

"Min pile height to make descheight"

Definition at line 95 of file makerow.cpp.

◆ textord_descx_ratio_max

double textord_descx_ratio_max = 0.6

"Max desc/xheight"

Definition at line 99 of file makerow.cpp.

◆ textord_descx_ratio_min

double textord_descx_ratio_min = 0.25

"Min desc/xheight"

Definition at line 98 of file makerow.cpp.

◆ textord_excess_blobsize

double textord_excess_blobsize = 1.3

"New row made if blob makes row this big"

Definition at line 85 of file makerow.cpp.

◆ textord_expansion_factor

double textord_expansion_factor = 1.0

"Factor to expand rows by in expand_rows"

Definition at line 80 of file makerow.cpp.

◆ textord_fix_makerow_bug

bool textord_fix_makerow_bug = TRUE

"Prevent multiple baselines"

Definition at line 56 of file makerow.cpp.

◆ textord_fix_xheight_bug

bool textord_fix_xheight_bug = TRUE

"Use spline baseline"

Definition at line 55 of file makerow.cpp.

◆ textord_heavy_nr

bool textord_heavy_nr = FALSE

"Vigorously remove noise"

Definition at line 44 of file makerow.cpp.

◆ textord_interpolating_skew

bool textord_interpolating_skew = TRUE

"Interpolate across gaps"

Definition at line 59 of file makerow.cpp.

◆ textord_linespace_iqrlimit

double textord_linespace_iqrlimit = 0.2

"Max iqr/median for linespace"

Definition at line 76 of file makerow.cpp.

◆ textord_lms_line_trials

int textord_lms_line_trials = 12

"Number of linew fits to do"

Definition at line 101 of file makerow.cpp.

◆ textord_max_blob_overlaps

int textord_max_blob_overlaps = 4

"Max number of blobs a big blob can overlap"

Definition at line 68 of file makerow.cpp.

◆ textord_min_blob_height_fraction

double textord_min_blob_height_fraction = 0.75

"Min blob height/top to include blob top into xheight stats"

Definition at line 89 of file makerow.cpp.

◆ textord_min_blobs_in_row

int textord_min_blobs_in_row = 4

"Min blobs before gradient counted"

Definition at line 64 of file makerow.cpp.

◆ textord_min_linesize

double textord_min_linesize = 1.25

"* blob height for initial linesize"

Definition at line 83 of file makerow.cpp.

◆ textord_min_xheight

int textord_min_xheight = 10

"Min credible pixel xheight"

Definition at line 69 of file makerow.cpp.

◆ textord_minxh

double textord_minxh = 0.25

"fraction of linesize for min xheight"

Definition at line 82 of file makerow.cpp.

◆ textord_new_initial_xheight

bool textord_new_initial_xheight = TRUE

"Use test xheight mechanism"

Definition at line 102 of file makerow.cpp.

◆ textord_occupancy_threshold

double textord_occupancy_threshold = 0.4

"Fraction of neighbourhood"

Definition at line 86 of file makerow.cpp.

◆ textord_old_baselines

bool textord_old_baselines = TRUE

"Use old baseline algorithm"

Definition at line 53 of file makerow.cpp.

◆ textord_old_xheight

bool textord_old_xheight = FALSE

"Use old xheight algorithm"

Definition at line 54 of file makerow.cpp.

◆ textord_overlap_x

double textord_overlap_x = 0.375

"Fraction of linespace for good overlap"

Definition at line 81 of file makerow.cpp.

◆ textord_parallel_baselines

bool textord_parallel_baselines = TRUE

"Force parallel baselines"

Definition at line 51 of file makerow.cpp.

◆ textord_show_expanded_rows

bool textord_show_expanded_rows = FALSE

"Display rows after expanding"

Definition at line 47 of file makerow.cpp.

◆ textord_show_final_blobs

bool textord_show_final_blobs = FALSE

"Display blob bounds after pre-ass"

Definition at line 49 of file makerow.cpp.

◆ textord_show_final_rows

bool textord_show_final_rows = FALSE

"Display rows after final fitting"

Definition at line 48 of file makerow.cpp.

◆ textord_show_initial_rows

bool textord_show_initial_rows = FALSE

"Display row accumulation"

Definition at line 45 of file makerow.cpp.

◆ textord_show_parallel_rows

bool textord_show_parallel_rows = FALSE

"Display page correlated rows"

Definition at line 46 of file makerow.cpp.

◆ textord_skew_ile

double textord_skew_ile = 0.5

"Ile of gradients for page skew"

Definition at line 74 of file makerow.cpp.

◆ textord_skew_lag

double textord_skew_lag = 0.02

"Lag for skew on row accumulation"

Definition at line 75 of file makerow.cpp.

◆ textord_skewsmooth_offset

int textord_skewsmooth_offset = 4

"For smooth factor"

Definition at line 60 of file makerow.cpp.

◆ textord_skewsmooth_offset2

int textord_skewsmooth_offset2 = 1

"For smooth factor"

Definition at line 61 of file makerow.cpp.

◆ textord_spline_medianwin

int textord_spline_medianwin = 6

"Size of window for spline segmentation"

Definition at line 66 of file makerow.cpp.

◆ textord_spline_minblobs

int textord_spline_minblobs = 8

"Min blobs in each spline segment"

Definition at line 65 of file makerow.cpp.

◆ textord_spline_outlier_fraction

double textord_spline_outlier_fraction = 0.1

"Fraction of line spacing for outlier"

Definition at line 73 of file makerow.cpp.

◆ textord_spline_shift_fraction

double textord_spline_shift_fraction = 0.02

"Fraction of line spacing for quad"

Definition at line 71 of file makerow.cpp.

◆ textord_straight_baselines

bool textord_straight_baselines = FALSE

"Force straight baselines"

Definition at line 52 of file makerow.cpp.

◆ textord_test_landscape

bool textord_test_landscape = FALSE

"Tests refer to land/port"

Definition at line 50 of file makerow.cpp.

◆ textord_test_x

int textord_test_x = -MAX_INT32

"coord of test pt"

Definition at line 62 of file makerow.cpp.

◆ textord_test_y

int textord_test_y = -MAX_INT32

"coord of test pt"

Definition at line 63 of file makerow.cpp.

◆ textord_underline_width

double textord_underline_width = 2.0

"Multiple of line_size for underline"

Definition at line 87 of file makerow.cpp.

◆ textord_width_limit

double textord_width_limit = 8

"Max width of blobs to make rows"

Definition at line 77 of file makerow.cpp.

◆ textord_xheight_error_margin

double textord_xheight_error_margin = 0.1

"Accepted variation"

Definition at line 100 of file makerow.cpp.

◆ textord_xheight_mode_fraction

double textord_xheight_mode_fraction = 0.4

"Min pile height to make xheight"

Definition at line 91 of file makerow.cpp.