38 "Use the new outline complexity module");
40 "Max number of children inside a character outline");
42 "Max layers of nested children inside a character outline");
44 "turn on debugging for this module");
48 "Importance ratio for chucking outlines");
50 "Max holes allowed in blob");
52 "Remove boxy parents of char-like children");
54 "Min pixels for potential char in box");
56 "Max lensq/area for acceptable child outline");
58 "Min area fraction of child outline");
60 "Min area fraction of grandchild for box");
70 ICOORD tright): bl(bleft), tr(tright) {
74 buckets =
new C_OUTLINE_LIST[bxdim * bydim];
121 inT16 xindex, yindex;
124 inT32 grandchild_count;
125 C_OUTLINE_IT child_it;
133 grandchild_count = 0;
135 return max_count + depth;
137 for (yindex = ymin; yindex <= ymax; yindex++) {
138 for (xindex = xmin; xindex <= xmax; xindex++) {
139 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
140 if (child_it.empty())
142 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
143 child_it.forward()) {
144 child = child_it.data();
145 if (child == outline || !(*child < *outline))
151 tprintf(
"Discard outline on child_count=%d > " 152 "max_children_per_outline=%d\n",
155 return max_count + child_count;
159 inT32 remaining_count = max_count - child_count - grandchild_count;
160 if (remaining_count > 0)
163 if (child_count + grandchild_count > max_count) {
165 tprintf(
"Disgard outline on child_count=%d + grandchild_count=%d " 167 child_count, grandchild_count, max_count);
168 return child_count + grandchild_count;
173 return child_count + grandchild_count;
190 inT16 xindex, yindex;
193 inT32 grandchild_count;
199 C_OUTLINE_IT child_it;
207 grandchild_count = 0;
211 for (yindex = ymin; yindex <= ymax; yindex++) {
212 for (xindex = xmin; xindex <= xmax; xindex++) {
213 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
214 if (child_it.empty())
216 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
217 child_it.forward()) {
218 child = child_it.data();
219 if (child != outline && *child < *outline) {
221 if (child_count <= max_count) {
222 int max_grand =(max_count - child_count) /
230 if (child_count + grandchild_count > max_count) {
232 tprintf(
"Discarding parent with child count=%d, gc=%d\n",
233 child_count,grandchild_count);
234 return child_count + grandchild_count;
236 if (parent_area == 0) {
239 parent_area = -parent_area;
241 if (parent_area < max_parent_area)
249 child_area = -child_area;
251 if (parent_area - child_area < max_parent_area) {
255 if (grandchild_count > 0) {
257 tprintf(
"Discarding parent of area %d, child area=%d, max%g " 259 parent_area, child_area, max_parent_area,
261 return max_count + 1;
264 if (child_length * child_length >
267 tprintf(
"Discarding parent of area %d, child area=%d, max%g " 268 "with child length=%d\n",
269 parent_area, child_area, max_parent_area,
271 return max_count + 1;
276 tprintf(
"Discarding parent of area %d, child area=%d, max%g " 277 "with child rect=%d\n",
278 parent_area, child_area, max_parent_area,
280 return max_count + 1;
287 return child_count + grandchild_count;
305 inT16 xindex, yindex;
307 C_OUTLINE_IT child_it;
314 for (yindex = ymin; yindex <= ymax; yindex++) {
315 for (xindex = xmin; xindex <= xmax; xindex++) {
316 child_it.set_to_list(&buckets[yindex * bxdim + xindex]);
317 for (child_it.mark_cycle_pt(); !child_it.cycled_list();
318 child_it.forward()) {
319 if (*child_it.data() < *outline) {
320 it->add_after_then_move(child_it.extract());
336 C_OUTLINE_LIST outlines;
337 C_OUTLINE_IT out_it = &outlines;
358 C_OUTLINE_LIST *outlines) {
374 C_OUTLINE_LIST *outlines,
378 C_OUTLINE_IT out_it = outlines;
379 C_OUTLINE_IT bucket_it;
382 for (out_it.mark_cycle_pt(); !out_it.cycled_list(); out_it.forward()) {
383 outline = out_it.extract();
386 bucket_it.set_to_list((*buckets) (ol_box.
left(), ol_box.
bottom()));
387 bucket_it.add_to_end(outline);
403 C_OUTLINE_LIST outlines;
405 C_OUTLINE_IT out_it = &outlines;
406 C_OUTLINE_IT bucket_it = buckets->
start_scan();
407 C_OUTLINE_IT parent_it;
408 C_BLOB_IT good_blobs = block->
blob_list();
411 while (!bucket_it.empty()) {
412 out_it.set_to_list(&outlines);
414 parent_it = bucket_it;
417 }
while (!bucket_it.at_first() &&
418 !(*parent_it.data() < *bucket_it.data()));
419 }
while (!bucket_it.at_first());
422 out_it.add_after_then_move(parent_it.extract());
427 bucket_it.set_to_list(buckets->
scan_next());
442 C_BLOB_IT *reject_it,
443 C_OUTLINE_IT *blob_it
448 outline = blob_it->data();
C_OUTLINE_LIST * start_scan()
EXTERN double edges_boxarea
EXTERN int edges_children_per_grandchild
EXTERN bool edges_use_new_outline_complexity
EXTERN double edges_childarea
void block_edges(Pix *t_pix, PDBLK *block, C_OUTLINE_IT *outline_it)
OL_BUCKETS(ICOORD bleft, ICOORD tright)
void outlines_to_blobs(BLOCK *block, ICOORD bleft, ICOORD tright, C_OUTLINE_LIST *outlines)
EXTERN int edges_max_children_layers
C_BLOB_LIST * reject_blobs()
inT32 outline_complexity(C_OUTLINE *outline, inT32 max_count, inT16 depth)
void extract_children(C_OUTLINE *outline, C_OUTLINE_IT *it)
EXTERN int edges_min_nonhole
EXTERN int edges_children_count_limit
EXTERN int edges_patharea_ratio
inT32 count_children(C_OUTLINE *outline, inT32 max_count)
inT16 x() const
access function
void extract_edges(Pix *pix, BLOCK *block)
const TBOX & bounding_box() const
void fill_buckets(C_OUTLINE_LIST *outlines, OL_BUCKETS *buckets)
C_OUTLINE_LIST * operator()(inT16 x, inT16 y)
C_OUTLINE_LIST * scan_next()
EXTERN int edges_max_children_per_outline
void empty_buckets(BLOCK *block, OL_BUCKETS *buckets)
C_BLOB_LIST * blob_list()
get blobs
#define double_VAR(name, val, comment)
EXTERN bool edges_children_fix
#define BOOL_VAR(name, val, comment)
static void ConstructBlobsFromOutlines(bool good_blob, C_OUTLINE_LIST *outline_list, C_BLOB_IT *good_blobs_it, C_BLOB_IT *bad_blobs_it)
BOOL8 capture_children(OL_BUCKETS *buckets, C_BLOB_IT *reject_it, C_OUTLINE_IT *blob_it)
void bounding_box(ICOORD &bottom_left, ICOORD &top_right) const
get box
#define INT_VAR(name, val, comment)
inT16 y() const
access_function