tesseract
3.05.02
|
#include "stderr.h"
#include "blobbox.h"
#include "ccstruct.h"
#include "detlinefit.h"
#include "statistc.h"
#include "drawtord.h"
#include "blkocc.h"
#include "sortflts.h"
#include "oldbasel.h"
#include "textord.h"
#include "tordmain.h"
#include "underlin.h"
#include "makerow.h"
#include "tprintf.h"
#include "tovars.h"
Go to the source code of this file.
Namespaces | |
tesseract | |
Macros | |
#define | MAX_HEIGHT_MODES 12 |
Functions | |
float | MakeRowFromSubBlobs (TO_BLOCK *block, C_BLOB *blob, TO_ROW_IT *row_it) |
make_single_row | |
Arrange the blobs into a single row... well actually, if there is only a single blob, it makes 2 rows, in case the top-level blob is a container of the real blobs to recognize. | |
float | make_single_row (ICOORD page_tr, bool allow_sub_blobs, TO_BLOCK *block, TO_BLOCK_LIST *blocks) |
make_rows | |
Arrange the blobs into rows. | |
float | make_rows (ICOORD page_tr, TO_BLOCK_LIST *port_blocks) |
make_initial_textrows | |
Arrange the good blobs into rows of text. | |
void | make_initial_textrows (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on) |
fit_lms_line | |
Fit an LMS line to a row. | |
void | fit_lms_line (TO_ROW *row) |
find_best_dropout_row | |
Delete this row if it has a neighbour with better dropout characteristics. TRUE is returned if the row should be deleted. | |
BOOL8 | find_best_dropout_row (TO_ROW *row, inT32 distance, float dist_limit, inT32 line_index, TO_ROW_IT *row_it, BOOL8 testing_on) |
deskew_block_coords | |
Compute the bounding box of all the blobs in the block if they were deskewed without actually doing it. | |
TBOX | deskew_block_coords (TO_BLOCK *block, float gradient) |
compute_line_occupation | |
Compute the pixel projection back on the y axis given the global skew. Also compute the 1st derivative. | |
void | compute_line_occupation (TO_BLOCK *block, float gradient, inT32 min_y, inT32 max_y, inT32 *occupation, inT32 *deltas) |
void | compute_occupation_threshold (inT32 low_window, inT32 high_window, inT32 line_count, inT32 *occupation, inT32 *thresholds) |
compute_dropout_distances | |
Compute the distance from each coordinate to the nearest dropout. | |
void | compute_dropout_distances (inT32 *occupation, inT32 *thresholds, inT32 line_count) |
expand_rows | |
Expand each row to the least of its allowed size and touching its neighbours. If the expansion would entirely swallow a neighbouring row then do so. | |
void | expand_rows (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on) |
void | adjust_row_limits (TO_BLOCK *block) |
compute_row_stats | |
Compute the linespacing and offset. | |
void | compute_row_stats (TO_BLOCK *block, BOOL8 testing_on) |
fill_heights | |
Fill the given heights with heights of the blobs that are legal candidates for estimating xheight. | |
void | fill_heights (TO_ROW *row, float gradient, int min_height, int max_height, STATS *heights, STATS *floating_heights) |
compute_xheight_from_modes | |
Given a STATS object heights, looks for two most frequently occurring heights that look like xheight and xheight + ascrise. If found, sets the values of *xheight and *ascrise accordingly, otherwise sets xheight to any most frequently occurring height and sets *ascrise to 0. Returns the number of times xheight occurred in heights. For each mode that is considered for being an xheight the count of floating blobs (stored in floating_heights) is subtracted from the total count of the blobs of this height. This is done because blobs that sit far above the baseline could represent valid ascenders, but it is highly unlikely that such a character's height will be an xheight (e.g. -, ', =, ^, ‘, ", ’, etc) If cap_only, then force finding of only the top mode. | |
int | compute_xheight_from_modes (STATS *heights, STATS *floating_heights, bool cap_only, int min_height, int max_height, float *xheight, float *ascrise) |
compute_row_descdrop | |
Estimates the descdrop of this row. This function looks for "significant" descenders of lowercase letters (those that could not just be the small descenders of upper case letters like Q,J). The function also takes into account how many potential ascenders this row might contain. If the number of potential ascenders along with descenders is close to the expected fraction of the total number of blobs in the row, the function returns the descender height, returns 0 otherwise. | |
inT32 | compute_row_descdrop (TO_ROW *row, float gradient, int xheight_blob_count, STATS *asc_heights) |
compute_height_modes | |
Find the top maxmodes values in the input array and put their indices in the output in the order in which they occurred. | |
inT32 | compute_height_modes (STATS *heights, inT32 min_height, inT32 max_height, inT32 *modes, inT32 maxmodes) |
correct_row_xheight | |
Adjust the xheight etc of this row if not within reasonable limits of the average for the block. | |
void | correct_row_xheight (TO_ROW *row, float xheight, float ascrise, float descdrop) |
separate_underlines | |
Test wide objects for being potential underlines. If they are then put them in a separate list in the block. | |
void | separate_underlines (TO_BLOCK *block, float gradient, FCOORD rotation, BOOL8 testing_on) |
pre_associate_blobs | |
Associate overlapping blobs and fake chop wide blobs. | |
void | pre_associate_blobs (ICOORD page_tr, TO_BLOCK *block, FCOORD rotation, BOOL8 testing_on) |
fit_parallel_rows | |
Re-fit the rows in the block to the given gradient. | |
void | fit_parallel_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on) |
fit_parallel_lms | |
Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly. | |
void | fit_parallel_lms (float gradient, TO_ROW *row) |
make_baseline_spline | |
Fit an LMS line to a row. Make the fit parallel to the given gradient and set the row accordingly. | |
void | make_baseline_spline (TO_ROW *row, TO_BLOCK *block) |
segment_baseline | |
Divide the baseline up into segments which require a different quadratic fitted to them. Return TRUE if enough blobs were far enough away to need a quadratic. | |
BOOL8 | segment_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[]) |
linear_spline_baseline | |
Divide the baseline up into segments which require a different quadratic fitted to them.
| |
double * | linear_spline_baseline (TO_ROW *row, TO_BLOCK *block, inT32 &segments, inT32 xstarts[]) |
assign_blobs_to_rows | |
Make enough rows to allocate all the given blobs to one. If a block skew is given, use that, else attempt to track it. | |
void | assign_blobs_to_rows (TO_BLOCK *block, float *gradient, int pass, BOOL8 reject_misses, BOOL8 make_new_rows, BOOL8 drawing_skew) |
most_overlapping_row | |
Return the row which most overlaps the blob. | |
OVERLAP_STATE | most_overlapping_row (TO_ROW_IT *row_it, TO_ROW *&best_row, float top, float bottom, float rowsize, BOOL8 testing_blob) |
blob_x_order | |
Sort function to sort blobs in x from page left. | |
int | blob_x_order (const void *item1, const void *item2) |
row_y_order | |
Sort function to sort rows in y from page top. | |
int | row_y_order (const void *item1, const void *item2) |
row_spacing_order | |
Qsort style function to compare 2 TO_ROWS based on their spacing value. | |
int | row_spacing_order (const void *item1, const void *item2) |
mark_repeated_chars | |
Mark blobs marked with BTFT_LEADER in repeated sets using the repeated_set member of BLOBNBOX. | |
void | mark_repeated_chars (TO_ROW *row) |
compute_page_skew | |
Compute the skew over a full page by averaging the gradients over all the lines. Get the error of the same row. | |
const double | kNoiseSize = 0.5 |
const int | kMinSize = 8 |
void | compute_page_skew (TO_BLOCK_LIST *blocks, float &page_m, float &page_err) |
void | vigorous_noise_removal (TO_BLOCK *block) |
void | cleanup_rows_making (ICOORD page_tr, TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on) |
void | delete_non_dropout_rows (TO_BLOCK *block, float gradient, FCOORD rotation, inT32 block_edge, BOOL8 testing_on) |
#define MAX_HEIGHT_MODES 12 |
Definition at line 105 of file makerow.cpp.
void adjust_row_limits | ( | TO_BLOCK * | block | ) |
adjust_row_limits
Change the limits of rows to suit the default fractions.
Definition at line 1120 of file makerow.cpp.
void assign_blobs_to_rows | ( | TO_BLOCK * | block, |
float * | gradient, | ||
int | pass, | ||
BOOL8 | reject_misses, | ||
BOOL8 | make_new_rows, | ||
BOOL8 | drawing_skew | ||
) |
Definition at line 2296 of file makerow.cpp.
int blob_x_order | ( | const void * | item1, |
const void * | item2 | ||
) |
Definition at line 2591 of file makerow.cpp.
void cleanup_rows_making | ( | ICOORD | page_tr, |
TO_BLOCK * | block, | ||
float | gradient, | ||
FCOORD | rotation, | ||
inT32 | block_edge, | ||
BOOL8 | testing_on | ||
) |
cleanup_rows_making
Remove overlapping rows and fit all the blobs to what's left.
Definition at line 524 of file makerow.cpp.
Definition at line 915 of file makerow.cpp.
inT32 compute_height_modes | ( | STATS * | heights, |
inT32 | min_height, | ||
inT32 | max_height, | ||
inT32 * | modes, | ||
inT32 | maxmodes | ||
) |
Definition at line 1640 of file makerow.cpp.
void compute_line_occupation | ( | TO_BLOCK * | block, |
float | gradient, | ||
inT32 | min_y, | ||
inT32 | max_y, | ||
inT32 * | occupation, | ||
inT32 * | deltas | ||
) |
Definition at line 781 of file makerow.cpp.
void compute_occupation_threshold | ( | inT32 | low_window, |
inT32 | high_window, | ||
inT32 | line_count, | ||
inT32 * | occupation, | ||
inT32 * | thresholds | ||
) |
compute_occupation_threshold
Compute thresholds for textline or not for the occupation array.
Definition at line 834 of file makerow.cpp.
void compute_page_skew | ( | TO_BLOCK_LIST * | blocks, |
float & | page_m, | ||
float & | page_err | ||
) |
Definition at line 287 of file makerow.cpp.
inT32 compute_row_descdrop | ( | TO_ROW * | row, |
float | gradient, | ||
int | xheight_blob_count, | ||
STATS * | asc_heights | ||
) |
Definition at line 1580 of file makerow.cpp.
Definition at line 1156 of file makerow.cpp.
int compute_xheight_from_modes | ( | STATS * | heights, |
STATS * | floating_heights, | ||
bool | cap_only, | ||
int | min_height, | ||
int | max_height, | ||
float * | xheight, | ||
float * | ascrise | ||
) |
Definition at line 1484 of file makerow.cpp.
void correct_row_xheight | ( | TO_ROW * | row, |
float | xheight, | ||
float | ascrise, | ||
float | descdrop | ||
) |
Definition at line 1702 of file makerow.cpp.
void delete_non_dropout_rows | ( | TO_BLOCK * | block, |
float | gradient, | ||
FCOORD | rotation, | ||
inT32 | block_edge, | ||
BOOL8 | testing_on | ||
) |
delete_non_dropout_rows
Compute the linespacing and offset.
Definition at line 577 of file makerow.cpp.
Definition at line 745 of file makerow.cpp.
void expand_rows | ( | ICOORD | page_tr, |
TO_BLOCK * | block, | ||
float | gradient, | ||
FCOORD | rotation, | ||
inT32 | block_edge, | ||
BOOL8 | testing_on | ||
) |
Definition at line 962 of file makerow.cpp.
void fill_heights | ( | TO_ROW * | row, |
float | gradient, | ||
int | min_height, | ||
int | max_height, | ||
STATS * | heights, | ||
STATS * | floating_heights | ||
) |
Definition at line 1423 of file makerow.cpp.
BOOL8 find_best_dropout_row | ( | TO_ROW * | row, |
inT32 | distance, | ||
float | dist_limit, | ||
inT32 | line_index, | ||
TO_ROW_IT * | row_it, | ||
BOOL8 | testing_on | ||
) |
Definition at line 665 of file makerow.cpp.
void fit_lms_line | ( | TO_ROW * | row | ) |
Definition at line 267 of file makerow.cpp.
void fit_parallel_lms | ( | float | gradient, |
TO_ROW * | row | ||
) |
Definition at line 1990 of file makerow.cpp.
void fit_parallel_rows | ( | TO_BLOCK * | block, |
float | gradient, | ||
FCOORD | rotation, | ||
inT32 | block_edge, | ||
BOOL8 | testing_on | ||
) |
Definition at line 1948 of file makerow.cpp.
double* linear_spline_baseline | ( | TO_ROW * | row, |
TO_BLOCK * | block, | ||
inT32 & | segments, | ||
inT32 | xstarts[] | ||
) |
Definition at line 2205 of file makerow.cpp.
Definition at line 2073 of file makerow.cpp.
Definition at line 227 of file makerow.cpp.
float make_rows | ( | ICOORD | page_tr, |
TO_BLOCK_LIST * | port_blocks | ||
) |
Definition at line 201 of file makerow.cpp.
float make_single_row | ( | ICOORD | page_tr, |
bool | allow_sub_blobs, | ||
TO_BLOCK * | block, | ||
TO_BLOCK_LIST * | blocks | ||
) |
Definition at line 164 of file makerow.cpp.
Definition at line 137 of file makerow.cpp.
void mark_repeated_chars | ( | TO_ROW * | row | ) |
Definition at line 2657 of file makerow.cpp.
OVERLAP_STATE most_overlapping_row | ( | TO_ROW_IT * | row_it, |
TO_ROW *& | best_row, | ||
float | top, | ||
float | bottom, | ||
float | rowsize, | ||
BOOL8 | testing_blob | ||
) |
Definition at line 2496 of file makerow.cpp.
Definition at line 1862 of file makerow.cpp.
int row_spacing_order | ( | const void * | item1, |
const void * | item2 | ||
) |
Definition at line 2635 of file makerow.cpp.
int row_y_order | ( | const void * | item1, |
const void * | item2 | ||
) |
Definition at line 2613 of file makerow.cpp.
Definition at line 2106 of file makerow.cpp.
Definition at line 1789 of file makerow.cpp.
void vigorous_noise_removal | ( | TO_BLOCK * | block | ) |
Definition at line 473 of file makerow.cpp.
const int kMinLeaderCount = 5 |
Definition at line 107 of file makerow.cpp.
const int kMinSize = 8 |
Definition at line 384 of file makerow.cpp.
const double kNoiseSize = 0.5 |
Definition at line 383 of file makerow.cpp.
double textord_ascheight_mode_fraction = 0.08 |
"Min pile height to make ascheight"
Definition at line 93 of file makerow.cpp.
double textord_ascx_ratio_max = 1.8 |
"Max cap/xheight"
Definition at line 97 of file makerow.cpp.
double textord_ascx_ratio_min = 1.25 |
"Min cap/xheight"
Definition at line 96 of file makerow.cpp.
bool textord_biased_skewcalc = TRUE |
"Bias skew estimates with line length"
Definition at line 58 of file makerow.cpp.
double textord_chop_width = 1.5 |
"Max width before chopping"
Definition at line 78 of file makerow.cpp.
bool textord_debug_blob = FALSE |
"Print test blob information"
Definition at line 103 of file makerow.cpp.
bool textord_debug_xheights = FALSE |
"Test xheight algorithms"
Definition at line 57 of file makerow.cpp.
double textord_descheight_mode_fraction = 0.08 |
"Min pile height to make descheight"
Definition at line 95 of file makerow.cpp.
double textord_descx_ratio_max = 0.6 |
"Max desc/xheight"
Definition at line 99 of file makerow.cpp.
double textord_descx_ratio_min = 0.25 |
"Min desc/xheight"
Definition at line 98 of file makerow.cpp.
double textord_excess_blobsize = 1.3 |
"New row made if blob makes row this big"
Definition at line 85 of file makerow.cpp.
double textord_expansion_factor = 1.0 |
"Factor to expand rows by in expand_rows"
Definition at line 80 of file makerow.cpp.
bool textord_fix_makerow_bug = TRUE |
"Prevent multiple baselines"
Definition at line 56 of file makerow.cpp.
bool textord_fix_xheight_bug = TRUE |
"Use spline baseline"
Definition at line 55 of file makerow.cpp.
bool textord_heavy_nr = FALSE |
"Vigorously remove noise"
Definition at line 44 of file makerow.cpp.
bool textord_interpolating_skew = TRUE |
"Interpolate across gaps"
Definition at line 59 of file makerow.cpp.
double textord_linespace_iqrlimit = 0.2 |
"Max iqr/median for linespace"
Definition at line 76 of file makerow.cpp.
int textord_lms_line_trials = 12 |
"Number of linew fits to do"
Definition at line 101 of file makerow.cpp.
int textord_max_blob_overlaps = 4 |
"Max number of blobs a big blob can overlap"
Definition at line 68 of file makerow.cpp.
double textord_min_blob_height_fraction = 0.75 |
"Min blob height/top to include blob top into xheight stats"
Definition at line 89 of file makerow.cpp.
int textord_min_blobs_in_row = 4 |
"Min blobs before gradient counted"
Definition at line 64 of file makerow.cpp.
double textord_min_linesize = 1.25 |
"* blob height for initial linesize"
Definition at line 83 of file makerow.cpp.
int textord_min_xheight = 10 |
"Min credible pixel xheight"
Definition at line 69 of file makerow.cpp.
double textord_minxh = 0.25 |
"fraction of linesize for min xheight"
Definition at line 82 of file makerow.cpp.
bool textord_new_initial_xheight = TRUE |
"Use test xheight mechanism"
Definition at line 102 of file makerow.cpp.
double textord_occupancy_threshold = 0.4 |
"Fraction of neighbourhood"
Definition at line 86 of file makerow.cpp.
bool textord_old_baselines = TRUE |
"Use old baseline algorithm"
Definition at line 53 of file makerow.cpp.
bool textord_old_xheight = FALSE |
"Use old xheight algorithm"
Definition at line 54 of file makerow.cpp.
double textord_overlap_x = 0.375 |
"Fraction of linespace for good overlap"
Definition at line 81 of file makerow.cpp.
bool textord_parallel_baselines = TRUE |
"Force parallel baselines"
Definition at line 51 of file makerow.cpp.
bool textord_show_expanded_rows = FALSE |
"Display rows after expanding"
Definition at line 47 of file makerow.cpp.
bool textord_show_final_blobs = FALSE |
"Display blob bounds after pre-ass"
Definition at line 49 of file makerow.cpp.
bool textord_show_final_rows = FALSE |
"Display rows after final fitting"
Definition at line 48 of file makerow.cpp.
bool textord_show_initial_rows = FALSE |
"Display row accumulation"
Definition at line 45 of file makerow.cpp.
bool textord_show_parallel_rows = FALSE |
"Display page correlated rows"
Definition at line 46 of file makerow.cpp.
double textord_skew_ile = 0.5 |
"Ile of gradients for page skew"
Definition at line 74 of file makerow.cpp.
double textord_skew_lag = 0.02 |
"Lag for skew on row accumulation"
Definition at line 75 of file makerow.cpp.
int textord_skewsmooth_offset = 4 |
"For smooth factor"
Definition at line 60 of file makerow.cpp.
int textord_skewsmooth_offset2 = 1 |
"For smooth factor"
Definition at line 61 of file makerow.cpp.
int textord_spline_medianwin = 6 |
"Size of window for spline segmentation"
Definition at line 66 of file makerow.cpp.
int textord_spline_minblobs = 8 |
"Min blobs in each spline segment"
Definition at line 65 of file makerow.cpp.
double textord_spline_outlier_fraction = 0.1 |
"Fraction of line spacing for outlier"
Definition at line 73 of file makerow.cpp.
double textord_spline_shift_fraction = 0.02 |
"Fraction of line spacing for quad"
Definition at line 71 of file makerow.cpp.
bool textord_straight_baselines = FALSE |
"Force straight baselines"
Definition at line 52 of file makerow.cpp.
bool textord_test_landscape = FALSE |
"Tests refer to land/port"
Definition at line 50 of file makerow.cpp.
int textord_test_x = -MAX_INT32 |
"coord of test pt"
Definition at line 62 of file makerow.cpp.
int textord_test_y = -MAX_INT32 |
"coord of test pt"
Definition at line 63 of file makerow.cpp.
double textord_underline_width = 2.0 |
"Multiple of line_size for underline"
Definition at line 87 of file makerow.cpp.
double textord_width_limit = 8 |
"Max width of blobs to make rows"
Definition at line 77 of file makerow.cpp.
double textord_xheight_error_margin = 0.1 |
"Accepted variation"
Definition at line 100 of file makerow.cpp.
double textord_xheight_mode_fraction = 0.4 |
"Min pile height to make xheight"
Definition at line 91 of file makerow.cpp.