21 #pragma warning(disable:4244) // Conversion warnings 115 :
BlobGrid(gridsize, bleft, tright), nontext_map_(NULL), projection_(NULL),
116 denorm_(NULL), grid_box_(bleft, tright), rerotation_(1.0f, 0.0f) {
119 initial_widths_win_ = NULL;
121 diacritics_win_ = NULL;
122 textlines_win_ = NULL;
123 smoothed_win_ = NULL;
127 if (widths_win_ != NULL) {
128 #ifndef GRAPHICS_DISABLED 130 #endif // GRAPHICS_DISABLED 136 delete initial_widths_win_;
138 delete textlines_win_;
139 delete smoothed_win_;
140 delete diacritics_win_;
149 BLOBNBOX_IT blob_it(&block->
blobs);
150 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
151 SetNeighbours(
false,
false, blob_it.data());
164 InsertBlobs(input_block);
166 while (cjk_merge && FixBrokenCJK(input_block));
168 FindTextlineFlowDirection(pageseg_mode,
false);
174 static void CollectHorizVertBlobs(BLOBNBOX_LIST* input_blobs,
175 int* num_vertical_blobs,
176 int* num_horizontal_blobs,
177 BLOBNBOX_CLIST* vertical_blobs,
178 BLOBNBOX_CLIST* horizontal_blobs,
179 BLOBNBOX_CLIST* nondescript_blobs) {
180 BLOBNBOX_C_IT v_it(vertical_blobs);
181 BLOBNBOX_C_IT h_it(horizontal_blobs);
182 BLOBNBOX_C_IT n_it(nondescript_blobs);
183 BLOBNBOX_IT blob_it(input_blobs);
184 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
187 float y_x =
static_cast<float>(box.
height()) / box.
width();
188 float x_y = 1.0f / y_x;
190 float ratio = x_y > y_x ? x_y : y_x;
194 ++*num_vertical_blobs;
195 if (ok_blob) v_it.add_after_then_move(blob);
197 ++*num_horizontal_blobs;
198 if (ok_blob) h_it.add_after_then_move(blob);
199 }
else if (ok_blob) {
200 n_it.add_after_then_move(blob);
214 BLOBNBOX_CLIST* osd_blobs) {
215 int vertical_boxes = 0;
216 int horizontal_boxes = 0;
218 BLOBNBOX_CLIST vertical_blobs;
219 BLOBNBOX_CLIST horizontal_blobs;
220 BLOBNBOX_CLIST nondescript_blobs;
221 CollectHorizVertBlobs(&block->
blobs, &vertical_boxes, &horizontal_boxes,
222 &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
223 CollectHorizVertBlobs(&block->
large_blobs, &vertical_boxes, &horizontal_boxes,
224 &vertical_blobs, &horizontal_blobs, &nondescript_blobs);
226 tprintf(
"TextDir hbox=%d vs vbox=%d, %dH, %dV, %dN osd blobs\n",
227 horizontal_boxes, vertical_boxes,
228 horizontal_blobs.length(), vertical_blobs.length(),
229 nondescript_blobs.length());
230 if (osd_blobs != NULL && vertical_boxes == 0 && horizontal_boxes == 0) {
232 BLOBNBOX_C_IT osd_it(osd_blobs);
233 osd_it.add_list_after(&nondescript_blobs);
236 int min_vert_boxes =
static_cast<int>((vertical_boxes + horizontal_boxes) *
237 find_vertical_text_ratio);
238 if (vertical_boxes >= min_vert_boxes) {
239 if (osd_blobs != NULL) {
240 BLOBNBOX_C_IT osd_it(osd_blobs);
241 osd_it.add_list_after(&vertical_blobs);
245 if (osd_blobs != NULL) {
246 BLOBNBOX_C_IT osd_it(osd_blobs);
247 osd_it.add_list_after(&horizontal_blobs);
258 rerotation_.
set_x(rotation.
x());
259 rerotation_.
set_y(-rotation.
y());
267 ColPartition_LIST leader_parts;
268 FindLeadersAndMarkNoise(block, &leader_parts);
272 for (ColPartition_IT it(&leader_parts); !it.empty(); it.forward()) {
275 MarkLeaderNeighbours(part,
LR_LEFT);
276 MarkLeaderNeighbours(part,
LR_RIGHT);
297 TBOX search_box = box;
298 search_box.
pad(padding, padding);
305 rsearch.StartRectSearch(search_box);
306 while ((n = rsearch.NextRectSearch()) != NULL) {
307 if (n == bbox)
continue;
309 if (nbox.
height() > max_size) {
314 tprintf(
"Max neighbour size=%d for candidate line box at:", max_size);
318 #ifndef GRAPHICS_DISABLED 319 if (leaders_win_ != NULL) {
326 #endif // GRAPHICS_DISABLED 349 Pix* nontext_pix,
const DENORM* denorm,
bool cjk_script,
352 nontext_map_ = nontext_pix;
353 projection_ = projection;
364 FindTextlineFlowDirection(pageseg_mode,
false);
377 FindTextlineFlowDirection(pageseg_mode,
true);
379 FindInitialPartitions(pageseg_mode, rerotation,
true, block,
380 diacritic_blobs, part_grid, big_parts, &skew);
382 tprintf(
"Detected %d diacritics\n", diacritic_blobs->length());
386 FindTextlineFlowDirection(pageseg_mode,
true);
387 r = FindInitialPartitions(pageseg_mode, rerotation,
false, block,
388 diacritic_blobs, part_grid, big_parts, &skew);
395 static void PrintBoxWidths(
BLOBNBOX* neighbour) {
397 tprintf(
"Box (%d,%d)->(%d,%d): h-width=%.1f, v-width=%.1f p-width=%1.f\n",
410 FCOORD click(static_cast<float>(x), static_cast<float>(y));
414 PrintBoxWidths(neighbour);
425 tprintf(
"Left gap=%d, right=%d, above=%d, below=%d, horz=%d, vert=%d\n" 426 "Good= %d %d %d %d\n",
449 void StrokeWidth::FindLeadersAndMarkNoise(
TO_BLOCK* block,
450 ColPartition_LIST* leader_parts) {
456 gsearch.StartFullSearch();
457 while ((bbox = gsearch.NextFullSearch()) != NULL) {
458 SetNeighbours(
true,
false, bbox);
460 ColPartition_IT part_it(leader_parts);
461 gsearch.StartFullSearch();
462 while ((bbox = gsearch.NextFullSearch()) != NULL) {
477 if (part->MarkAsLeaderIfMonospaced())
478 part_it.add_after_then_move(part);
484 leaders_win_ = DisplayGoodBlobs(
"LeaderNeighbours", 0, 0);
488 BLOBNBOX_IT blob_it(&block->
blobs);
490 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
496 blob_it.add_to_end(small_it.extract());
503 for (noise_it.mark_cycle_pt(); !noise_it.cycled_list(); noise_it.forward()) {
506 small_it.add_to_end(noise_it.extract());
518 void StrokeWidth::InsertBlobs(
TO_BLOCK* block) {
526 void StrokeWidth::MarkLeaderNeighbours(
const ColPartition* part,
528 const TBOX& part_box = part->bounding_box();
533 blobsearch.StartSideSearch(side ==
LR_LEFT ? part_box.
left()
537 while ((blob = blobsearch.NextSideSearch(side ==
LR_LEFT)) != NULL) {
541 int x_gap = blob_box.
x_gap(part_box);
544 }
else if (best_blob == NULL || x_gap < best_gap) {
549 if (best_blob != NULL) {
554 #ifndef GRAPHICS_DISABLED 555 if (leaders_win_ != NULL) {
561 #endif // GRAPHICS_DISABLED 566 static int UpperQuartileCJKSize(
int gridsize, BLOBNBOX_LIST* blobs) {
568 BLOBNBOX_IT it(blobs);
569 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
574 sizes.add(height, 1);
576 return static_cast<int>(sizes.ile(0.75f) + 0.5);
584 bool StrokeWidth::FixBrokenCJK(
TO_BLOCK* block) {
585 BLOBNBOX_LIST* blobs = &block->
blobs;
586 int median_height = UpperQuartileCJKSize(
gridsize(), blobs);
590 BLOBNBOX_IT blob_it(blobs);
592 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
600 tprintf(
"Checking for Broken CJK (max size=%d):", max_size);
604 BLOBNBOX_CLIST overlapped_blobs;
605 AccumulateOverlaps(blob, debug, max_size, max_dist,
606 &bbox, &overlapped_blobs);
607 if (!overlapped_blobs.empty()) {
614 tprintf(
"Bad final aspectratio:");
622 tprintf(
"Too many neighbours: %d\n", overlapped_blobs.length());
626 BLOBNBOX_C_IT n_it(&overlapped_blobs);
627 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
629 neighbour = n_it.data();
634 if (!n_it.cycled_list()) {
637 PrintBoxWidths(blob);
647 for (n_it.mark_cycle_pt(); !n_it.cycled_list(); n_it.forward()) {
653 if (rerotation_.
x() != 1.0f || rerotation_.
y() != 0.0f) {
666 int num_remaining = 0;
667 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
684 static bool AcceptableCJKMerge(
const TBOX& bbox,
const TBOX& nbox,
685 bool debug,
int max_size,
int max_dist,
686 int* x_gap,
int* y_gap) {
687 *x_gap = bbox.
x_gap(nbox);
688 *y_gap = bbox.
y_gap(nbox);
692 tprintf(
"gaps = %d, %d, merged_box:", *x_gap, *y_gap);
695 if (*x_gap <= max_dist && *y_gap <= max_dist &&
696 merged.width() <= max_size && merged.height() <= max_size) {
698 double old_ratio =
static_cast<double>(bbox.
width()) / bbox.
height();
699 if (old_ratio < 1.0) old_ratio = 1.0 / old_ratio;
700 double new_ratio =
static_cast<double>(merged.width()) / merged.height();
701 if (new_ratio < 1.0) new_ratio = 1.0 / new_ratio;
712 void StrokeWidth::AccumulateOverlaps(
const BLOBNBOX* not_this,
bool debug,
713 int max_size,
int max_dist,
714 TBOX* bbox, BLOBNBOX_CLIST* blobs) {
723 int x = (bbox->
left() + bbox->
right()) / 2;
724 int y = (bbox->
bottom() + bbox->
top()) / 2;
729 while ((neighbour = radsearch.NextRadSearch()) != NULL) {
730 if (neighbour == not_this)
continue;
733 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size, max_dist,
737 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
743 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
744 if (nearests[dir] == NULL)
continue;
746 if (AcceptableCJKMerge(*bbox, nbox, debug, max_size,
747 max_dist, &x_gap, &y_gap)) {
750 blobs->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, nearests[dir]);
755 nearests[dir] = NULL;
759 }
else if (x_gap < 0 && x_gap <= y_gap) {
762 if (nearests[dir] == NULL ||
763 y_gap < bbox->y_gap(nearests[dir]->bounding_box())) {
764 nearests[dir] = neighbour;
766 }
else if (y_gap < 0 && y_gap <= x_gap) {
769 if (nearests[dir] == NULL ||
770 x_gap < bbox->x_gap(nearests[dir]->bounding_box())) {
771 nearests[dir] = neighbour;
780 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
781 if (nearests[dir] == NULL)
continue;
784 tprintf(
"Testing for overlap with:");
788 blobs->shallow_clear();
790 tprintf(
"Final box overlaps nearest\n");
803 void StrokeWidth::FindTextlineFlowDirection(
PageSegMode pageseg_mode,
804 bool display_if_debugging) {
808 gsearch.StartFullSearch();
809 while ((bbox = gsearch.NextFullSearch()) != NULL) {
810 SetNeighbours(
false, display_if_debugging, bbox);
813 gsearch.StartFullSearch();
814 while ((bbox = gsearch.NextFullSearch()) != NULL) {
815 SimplifyObviousNeighbours(bbox);
818 gsearch.StartFullSearch();
819 while ((bbox = gsearch.NextFullSearch()) != NULL) {
820 if (FindingVerticalOnly(pageseg_mode)) {
823 }
else if (FindingHorizontalOnly(pageseg_mode)) {
827 SetNeighbourFlows(bbox);
832 initial_widths_win_ = DisplayGoodBlobs(
"InitialStrokewidths", 400, 0);
835 gsearch.StartFullSearch();
836 while ((bbox = gsearch.NextFullSearch()) != NULL) {
837 SmoothNeighbourTypes(pageseg_mode,
false, bbox);
840 gsearch.StartFullSearch();
841 while ((bbox = gsearch.NextFullSearch()) != NULL) {
842 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
845 gsearch.StartFullSearch();
846 while ((bbox = gsearch.NextFullSearch()) != NULL) {
847 SmoothNeighbourTypes(pageseg_mode,
true, bbox);
851 widths_win_ = DisplayGoodBlobs(
"ImprovedStrokewidths", 800, 0);
859 void StrokeWidth::SetNeighbours(
bool leaders,
bool activate_line_trap,
861 int line_trap_count = 0;
862 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
864 line_trap_count += FindGoodNeighbour(bnd, leaders, blob);
866 if (line_trap_count > 0 && activate_line_trap) {
888 tprintf(
"FGN in dir %d for blob:", dir);
891 int top = blob_box.
top();
892 int bottom = blob_box.
bottom();
893 int left = blob_box.
left();
894 int right = blob_box.
right();
895 int width = right - left;
896 int height = top - bottom;
904 int line_trap_count = 0;
907 ? height / 2 : width / 2;
909 ? height / 3 : width / 3;
911 min_good_overlap = min_decent_overlap = 1;
913 int search_pad =
static_cast<int>(
917 TBOX search_box = blob_box;
930 search_box.
set_top(search_box.
top() + search_pad);
937 rectsearch.StartRectSearch(search_box);
939 double best_goodness = 0.0;
940 bool best_is_good =
false;
942 while ((neighbour = rectsearch.NextRectSearch()) != NULL) {
944 if (neighbour == blob)
946 int mid_x = (nbox.
left() + nbox.
right()) / 2;
947 if (mid_x < blob->left_rule() || mid_x > blob->
right_rule())
956 int n_width = nbox.
width();
957 int n_height = nbox.
height();
958 if (
MIN(n_width, n_height) > line_trap_min &&
959 MAX(n_width, n_height) < line_trap_max)
965 MAX(width, height)) &&
970 if (debug)
tprintf(
"Bad size\n");
984 perp_overlap = nbox.
width();
986 perp_overlap = overlap;
989 if (debug)
tprintf(
"On wrong side\n");
996 perp_overlap = nbox.
height();
998 perp_overlap = overlap;
1001 if (debug)
tprintf(
"On wrong side\n");
1006 if (-gap > overlap) {
1007 if (debug)
tprintf(
"Overlaps wrong way\n");
1010 if (perp_overlap < min_decent_overlap) {
1011 if (debug)
tprintf(
"Doesn't overlap enough\n");
1016 bool is_good = overlap >= min_good_overlap && !bad_sizes &&
1023 if (gap < 1) gap = 1;
1024 double goodness = (1.0 + is_good) * overlap / gap;
1026 tprintf(
"goodness = %g vs best of %g, good=%d, overlap=%d, gap=%d\n",
1027 goodness, best_goodness, is_good, overlap, gap);
1029 if (goodness > best_goodness) {
1030 best_neighbour = neighbour;
1031 best_goodness = goodness;
1032 best_is_good = is_good;
1036 return line_trap_count;
1040 static void ListNeighbours(
const BLOBNBOX* blob,
1041 BLOBNBOX_CLIST* neighbours) {
1042 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1045 if (neighbour != NULL) {
1046 neighbours->add_sorted(SortByBoxLeft<BLOBNBOX>,
true, neighbour);
1052 static void List2ndNeighbours(
const BLOBNBOX* blob,
1053 BLOBNBOX_CLIST* neighbours) {
1054 ListNeighbours(blob, neighbours);
1055 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1058 if (neighbour != NULL) {
1059 ListNeighbours(neighbour, neighbours);
1065 static void List3rdNeighbours(
const BLOBNBOX* blob,
1066 BLOBNBOX_CLIST* neighbours) {
1067 List2ndNeighbours(blob, neighbours);
1068 for (
int dir = 0; dir <
BND_COUNT; ++dir) {
1071 if (neighbour != NULL) {
1072 List2ndNeighbours(neighbour, neighbours);
1079 static void CountNeighbourGaps(
bool debug, BLOBNBOX_CLIST* neighbours,
1080 int* pure_h_count,
int* pure_v_count) {
1083 BLOBNBOX_C_IT it(neighbours);
1084 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1086 int h_min, h_max, v_min, v_max;
1089 tprintf(
"Hgaps [%d,%d], vgaps [%d,%d]:", h_min, h_max, v_min, v_max);
1090 if (h_max < v_min ||
1094 if (debug)
tprintf(
"Horz at:");
1095 }
else if (v_max < h_min) {
1098 if (debug)
tprintf(
"Vert at:");
1100 if (debug)
tprintf(
"Neither at:");
1110 void StrokeWidth::SetNeighbourFlows(
BLOBNBOX* blob) {
1116 tprintf(
"SetNeighbourFlows (current flow=%d, type=%d) on:",
1120 BLOBNBOX_CLIST neighbours;
1121 List3rdNeighbours(blob, &neighbours);
1123 int pure_h_count = 0;
1124 int pure_v_count = 0;
1125 CountNeighbourGaps(debug, &neighbours, &pure_h_count, &pure_v_count);
1129 tprintf(
"SetFlows: h_count=%d, v_count=%d\n",
1130 pure_h_count, pure_v_count);
1132 if (!neighbours.empty()) {
1135 if (pure_h_count > 2 * pure_v_count) {
1138 }
else if (pure_v_count > 2 * pure_h_count) {
1151 static void CountNeighbourTypes(BLOBNBOX_CLIST* neighbours,
1152 int* pure_h_count,
int* pure_v_count) {
1153 BLOBNBOX_C_IT it(neighbours);
1154 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1166 void StrokeWidth::SimplifyObviousNeighbours(
BLOBNBOX* blob) {
1188 int h_min, h_max, v_min, v_max;
1190 if ((h_max + margin < v_min && h_max < margin / 2) ||
1195 }
else if (v_max + margin < h_min && v_max < margin / 2) {
1205 void StrokeWidth::SmoothNeighbourTypes(
PageSegMode pageseg_mode,
bool reset_all,
1209 BLOBNBOX_CLIST neighbours;
1210 List2ndNeighbours(blob, &neighbours);
1212 int pure_h_count = 0;
1213 int pure_v_count = 0;
1214 CountNeighbourTypes(&neighbours, &pure_h_count, &pure_v_count);
1219 tprintf(
"pure_h=%d, pure_v=%d\n",
1220 pure_h_count, pure_v_count);
1222 if (pure_h_count > pure_v_count && !FindingVerticalOnly(pageseg_mode)) {
1226 }
else if (pure_v_count > pure_h_count &&
1227 !FindingHorizontalOnly(pageseg_mode)) {
1236 tprintf(
"Clean on pass 3!\n");
1254 TO_BLOCK* block, BLOBNBOX_LIST* diacritic_blobs,
1255 ColPartitionGrid* part_grid, ColPartition_LIST* big_parts,
1257 if (!FindingHorizontalOnly(pageseg_mode)) FindVerticalTextChains(part_grid);
1258 if (!FindingVerticalOnly(pageseg_mode)) FindHorizontalTextChains(part_grid);
1260 chains_win_ =
MakeWindow(0, 400,
"Initial text chains");
1261 part_grid->DisplayBoxes(chains_win_);
1264 if (find_problems) {
1268 part_grid->SplitOverlappingPartitions(big_parts);
1269 EasyMerges(part_grid);
1270 RemoveLargeUnusedBlobs(block, part_grid, big_parts);
1272 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box,
1275 grid_box, rerotation));
1276 int pre_overlap = part_grid->ComputeTotalOverlap(NULL);
1277 TestDiacritics(part_grid, block);
1278 MergeDiacritics(block, part_grid);
1279 if (find_problems && diacritic_blobs != NULL &&
1280 DetectAndRemoveNoise(pre_overlap, grid_box, block, part_grid,
1285 textlines_win_ =
MakeWindow(400, 400,
"GoodTextline blobs");
1286 part_grid->DisplayBoxes(textlines_win_);
1287 diacritics_win_ = DisplayDiacritics(
"Diacritics", 0, 0, block);
1289 PartitionRemainingBlobs(pageseg_mode, part_grid);
1290 part_grid->SplitOverlappingPartitions(big_parts);
1291 EasyMerges(part_grid);
1292 while (part_grid->GridSmoothNeighbours(
BTFT_CHAIN, nontext_map_, grid_box,
1295 grid_box, rerotation));
1298 grid_box, rerotation));
1300 smoothed_win_ =
MakeWindow(800, 400,
"Smoothed blobs");
1301 part_grid->DisplayBoxes(smoothed_win_);
1310 bool StrokeWidth::DetectAndRemoveNoise(
int pre_overlap,
const TBOX& grid_box,
1312 ColPartitionGrid* part_grid,
1313 BLOBNBOX_LIST* diacritic_blobs) {
1314 ColPartitionGrid* noise_grid = NULL;
1315 int post_overlap = part_grid->ComputeTotalOverlap(&noise_grid);
1316 if (pre_overlap == 0) pre_overlap = 1;
1317 BLOBNBOX_IT diacritic_it(diacritic_blobs);
1318 if (noise_grid != NULL) {
1324 noise_grid->DisplayBoxes(noise_win);
1326 part_grid->DeleteNonLeaderParts();
1329 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1337 rsearch.StartRectSearch(search_box);
1338 ColPartition* part = rsearch.NextRectSearch();
1343 diacritic_it.add_after_then_move(blob_it.extract());
1346 noise_grid->DeleteParts();
1350 noise_grid->DeleteParts();
1363 if (next_blob == NULL || next_blob->
owner() != NULL ||
1372 void StrokeWidth::FindVerticalTextChains(ColPartitionGrid* part_grid) {
1378 gsearch.StartFullSearch();
1379 while ((bbox = gsearch.NextFullSearch()) != NULL) {
1384 (blob = MutualUnusedVNeighbour(bbox,
BND_ABOVE)) != NULL) {
1388 while (blob != NULL) {
1390 blob = MutualUnusedVNeighbour(blob,
BND_ABOVE);
1392 blob = MutualUnusedVNeighbour(bbox,
BND_BELOW);
1393 while (blob != NULL) {
1395 blob = MutualUnusedVNeighbour(blob,
BND_BELOW);
1397 CompletePartition(pageseg_mode, part, part_grid);
1409 if (next_blob == NULL || next_blob->
owner() != NULL ||
1418 void StrokeWidth::FindHorizontalTextChains(ColPartitionGrid* part_grid) {
1424 gsearch.StartFullSearch();
1425 while ((bbox = gsearch.NextFullSearch()) != NULL) {
1428 (blob = MutualUnusedHNeighbour(bbox,
BND_RIGHT)) != NULL) {
1432 while (blob != NULL) {
1434 blob = MutualUnusedHNeighbour(blob,
BND_RIGHT);
1436 blob = MutualUnusedHNeighbour(bbox,
BND_LEFT);
1437 while (blob != NULL) {
1439 blob = MutualUnusedVNeighbour(blob,
BND_LEFT);
1441 CompletePartition(pageseg_mode, part, part_grid);
1453 void StrokeWidth::TestDiacritics(ColPartitionGrid* part_grid,
TO_BLOCK* block) {
1456 small_grid.InsertBlobList(&block->
blobs);
1457 int medium_diacritics = 0;
1458 int small_diacritics = 0;
1460 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1463 DiacriticBlob(&small_grid, blob)) {
1467 BLOBNBOX_IT blob_it(&block->
blobs);
1468 for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
1471 small_it.add_to_end(blob_it.extract());
1474 ColPartition* part = blob->
owner();
1475 if (part == NULL && DiacriticBlob(&small_grid, blob)) {
1476 ++medium_diacritics;
1478 small_it.add_to_end(blob_it.extract());
1479 }
else if (part != NULL && !part->block_owned() &&
1480 part->boxes_count() < 3) {
1486 BLOBNBOX_C_IT box_it(part->boxes());
1487 for (box_it.mark_cycle_pt(); !box_it.cycled_list() &&
1488 DiacriticBlob(&small_grid, box_it.data());
1490 if (box_it.cycled_list()) {
1492 while (!box_it.empty()) {
1501 ++medium_diacritics;
1508 small_it.add_to_end(blob_it.extract());
1509 part_grid->RemoveBBox(part);
1514 tprintf(
"Blob not available to be a diacritic at:");
1519 tprintf(
"Found %d small diacritics, %d medium\n",
1520 small_diacritics, medium_diacritics);
1530 bool StrokeWidth::DiacriticBlob(BlobGrid* small_grid,
BLOBNBOX* blob) {
1536 small_box.bottom());
1538 tprintf(
"Testing blob for diacriticness at:");
1541 int x = (small_box.left() + small_box.right()) / 2;
1542 int y = (small_box.bottom() + small_box.top()) / 2;
1545 int height = small_box.height();
1560 int best_total_dist = 0;
1564 TBOX search_box(small_box);
1567 search_box.
pad(x_pad, y_pad);
1569 rsearch.SetUniqueMode(
true);
1571 rsearch.StartRectSearch(search_box);
1573 while ((neighbour = rsearch.NextRectSearch()) != NULL) {
1575 neighbour == blob || neighbour->
owner() == blob->
owner())
1582 tprintf(
"Neighbour not strong enough:");
1587 if (nbox.
height() < min_height) {
1589 tprintf(
"Neighbour not big enough:");
1594 int x_gap = small_box.x_gap(nbox);
1595 int y_gap = small_box.y_gap(nbox);
1599 if (debug)
tprintf(
"xgap=%d, y=%d, total dist=%d\n",
1600 x_gap, y_gap, total_distance);
1601 if (total_distance >
1604 tprintf(
"Neighbour with median size %d too far away:",
1612 tprintf(
"Computing reduced box for :");
1615 int left = small_box.left() - small_box.width();
1616 int right = small_box.right() + small_box.width();
1618 y_gap = small_box.
y_gap(nbox);
1619 if (best_x_overlap == NULL || y_gap < best_y_gap) {
1620 best_x_overlap = neighbour;
1628 tprintf(
"Shrunken box doesn't win:");
1632 if (best_y_overlap == NULL || total_distance < best_total_dist) {
1634 tprintf(
"New best y overlap:");
1637 best_y_overlap = neighbour;
1638 best_total_dist = total_distance;
1640 tprintf(
"New y overlap box doesn't win:");
1644 tprintf(
"Neighbour wrong side of a tab:");
1648 if (best_x_overlap != NULL &&
1649 (best_y_overlap == NULL ||
1654 tprintf(
"DiacriticBlob OK! (x-overlap:");
1660 if (best_y_overlap != NULL &&
1661 DiacriticXGapFilled(small_grid, small_box,
1663 NoNoiseInBetween(small_box, best_y_overlap->
bounding_box())) {
1667 tprintf(
"DiacriticBlob OK! (y-overlap:");
1674 tprintf(
"DiacriticBlob fails:");
1676 tprintf(
"Best x+y gap = %d, y = %d\n", best_total_dist, best_y_gap);
1677 if (best_y_overlap != NULL) {
1678 tprintf(
"XGapFilled=%d, NoiseBetween=%d\n",
1679 DiacriticXGapFilled(small_grid, small_box,
1681 NoNoiseInBetween(small_box, best_y_overlap->
bounding_box()));
1700 bool StrokeWidth::DiacriticXGapFilled(BlobGrid* grid,
1701 const TBOX& diacritic_box,
1702 const TBOX& base_box) {
1706 TBOX occupied_box(base_box);
1708 while ((diacritic_gap = diacritic_box.
x_gap(occupied_box)) > max_gap) {
1709 TBOX search_box(occupied_box);
1710 if (diacritic_box.
left() > search_box.
right()) {
1720 rsearch.StartRectSearch(search_box);
1722 while ((neighbour = rsearch.NextRectSearch()) != NULL) {
1724 if (nbox.
x_gap(diacritic_box) < diacritic_gap) {
1725 if (nbox.
left() < occupied_box.left())
1727 if (nbox.
right() > occupied_box.right())
1728 occupied_box.set_right(nbox.
right());
1732 if (neighbour == NULL)
1739 void StrokeWidth::MergeDiacritics(
TO_BLOCK* block,
1740 ColPartitionGrid* part_grid) {
1742 for (small_it.mark_cycle_pt(); !small_it.cycled_list(); small_it.forward()) {
1748 if (part != NULL && !part->block_owned() && blob->
owner() == NULL &&
1752 part_grid->RemoveBBox(part);
1757 part_grid->InsertBBox(
true,
true, part);
1768 void StrokeWidth::RemoveLargeUnusedBlobs(
TO_BLOCK* block,
1769 ColPartitionGrid* part_grid,
1770 ColPartition_LIST* big_parts) {
1772 for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
1774 ColPartition* big_part = blob->
owner();
1775 if (big_part == NULL) {
1785 void StrokeWidth::PartitionRemainingBlobs(
PageSegMode pageseg_mode,
1786 ColPartitionGrid* part_grid) {
1789 int prev_grid_x = -1;
1790 int prev_grid_y = -1;
1791 BLOBNBOX_CLIST cell_list;
1792 BLOBNBOX_C_IT cell_it(&cell_list);
1793 bool cell_all_noise =
true;
1794 gsearch.StartFullSearch();
1795 while ((bbox = gsearch.NextFullSearch()) != NULL) {
1796 int grid_x = gsearch.GridX();
1797 int grid_y = gsearch.GridY();
1798 if (grid_x != prev_grid_x || grid_y != prev_grid_y) {
1800 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
1802 cell_it.set_to_list(&cell_list);
1803 prev_grid_x = grid_x;
1804 prev_grid_y = grid_y;
1805 cell_all_noise =
true;
1807 if (bbox->
owner() == NULL) {
1808 cell_it.add_to_end(bbox);
1810 cell_all_noise =
false;
1812 cell_all_noise =
false;
1815 MakePartitionsFromCellList(pageseg_mode, cell_all_noise, part_grid,
1821 void StrokeWidth::MakePartitionsFromCellList(
PageSegMode pageseg_mode,
1823 ColPartitionGrid* part_grid,
1824 BLOBNBOX_CLIST* cell_list) {
1825 if (cell_list->empty())
1827 BLOBNBOX_C_IT cell_it(cell_list);
1829 BLOBNBOX* bbox = cell_it.extract();
1833 for (cell_it.forward(); !cell_it.empty(); cell_it.forward()) {
1834 part->AddBox(cell_it.extract());
1836 CompletePartition(pageseg_mode, part, part_grid);
1838 for (; !cell_it.empty(); cell_it.forward()) {
1839 BLOBNBOX* bbox = cell_it.extract();
1843 CompletePartition(pageseg_mode, part, part_grid);
1850 void StrokeWidth::CompletePartition(
PageSegMode pageseg_mode,
1852 ColPartitionGrid* part_grid) {
1853 part->ComputeLimits();
1859 if (value > 0 && FindingVerticalOnly(pageseg_mode)) {
1860 value = part->boxes_count() == 1 ? 0 : -2;
1861 }
else if (value < 0 && FindingHorizontalOnly(pageseg_mode)) {
1862 value = part->boxes_count() == 1 ? 0 : 2;
1864 part->SetRegionAndFlowTypesFromProjectionValue(value);
1866 part_grid->InsertBBox(
true,
true, part);
1871 void StrokeWidth::EasyMerges(ColPartitionGrid* part_grid) {
1880 bool StrokeWidth::OrientationSearchBox(ColPartition* part,
TBOX* box) {
1881 if (part->IsVerticalType()) {
1892 bool StrokeWidth::ConfirmEasyMerge(
const ColPartition* p1,
1893 const ColPartition* p2) {
1899 if ((p1->IsVerticalType() || p2->IsVerticalType()) &&
1900 p1->HCoreOverlap(*p2) <= 0 &&
1901 ((!p1->IsSingleton() &&
1902 !p2->IsSingleton()) ||
1903 !p1->bounding_box().major_overlap(p2->bounding_box())))
1905 if ((p1->IsHorizontalType() || p2->IsHorizontalType()) &&
1906 p1->VCoreOverlap(*p2) <= 0 &&
1907 ((!p1->IsSingleton() &&
1908 !p2->IsSingleton()) ||
1909 (!p1->bounding_box().major_overlap(p2->bounding_box()) &&
1910 !p1->OKDiacriticMerge(*p2,
false) &&
1911 !p2->OKDiacriticMerge(*p1,
false))))
1913 if (!p1->ConfirmNoTabViolation(*p2))
1917 return NoNoiseInBetween(p1->bounding_box(), p2->bounding_box());
1921 bool StrokeWidth::NoNoiseInBetween(
const TBOX& box1,
const TBOX& box2)
const {
1929 ScrollView* StrokeWidth::DisplayGoodBlobs(
const char* window_name,
1932 #ifndef GRAPHICS_DISABLED 1939 gsearch.StartFullSearch();
1941 while ((bbox = gsearch.NextFullSearch()) != NULL) {
1943 int left_x = box.
left();
1944 int right_x = box.
right();
1945 int top_y = box.
top();
1946 int bottom_y = box.
bottom();
1957 else if (goodness == 1)
1963 window->
Rectangle(left_x, bottom_y, right_x, top_y);
1971 #ifndef GRAPHICS_DISABLED 1975 int x = (blob_box.
left() + blob_box.
right()) / 2;
1976 window->
Line(x, top, x, bottom);
1977 #endif // GRAPHICS_DISABLED 1981 ScrollView* StrokeWidth::DisplayDiacritics(
const char* window_name,
1984 #ifndef GRAPHICS_DISABLED 1989 BLOBNBOX_IT it(&block->
blobs);
1990 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1994 DrawDiacriticJoiner(blob, window);
2002 for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
2006 DrawDiacriticJoiner(blob, window);
bool leader_on_left() const
tesseract::ColPartition * owner() const
const TBOX & bounding_box() const
BlobGrid(int gridsize, const ICOORD &bleft, const ICOORD &tright)
bool y_overlap(const TBOX &box) const
void ConstructProjection(TO_BLOCK *input_block, const FCOORD &rotation, Pix *nontext_map)
static bool UnMergeableType(BlobRegionType type)
float area_stroke_width() const
void set_leader_on_left(bool flag)
bool overlap(const TBOX &box) const
int IntCastRounded(double x)
bool UniquelyVertical() const
bool IsVerticalType() const
const double kStrokeWidthTolerance
void AddBox(BLOBNBOX *box)
const ICOORD & tright() const
void set_neighbour(BlobNeighbourDir n, BLOBNBOX *neighbour, bool good)
GridSearch< ColPartition, ColPartition_CLIST, ColPartition_C_IT > ColPartitionGridSearch
int textord_tabfind_show_strokewidths
static bool BlankImageInBetween(const TBOX &box1, const TBOX &box2, const TBOX &im_box, const FCOORD &rotation, Pix *pix)
static bool WithinTestRegion(int detail_level, int x, int y)
const int kLineTrapShortest
bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, BLOBNBOX_CLIST *osd_blobs)
_ConstTessMemberResultCallback_0_0< false, R, T1 >::base * NewPermanentTessCallback(const T1 *obj, R(T2::*member)() const)
static bool DifferentSizes(int size1, int size2)
const double kMaxDiacriticGapToBaseCharHeight
const double kMaxDiacriticDistanceRatio
const int kLineResiduePadRatio
StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright)
ScrollView::Color BoxColor() const
const double kBrokenCJKIterationFraction
void Line(int x1, int y1, int x2, int y2)
void set_vert_possible(bool value)
bool DefiniteIndividualFlow()
const int kMostlyOneDirRatio
const double kCJKBrokenDistanceFraction
void RemoveLineResidue(ColPartition_LIST *big_part_list)
BlobTextFlowType flow() const
void set_flow(BlobTextFlowType value)
C_OUTLINE_LIST * out_list()
void really_merge(BLOBNBOX *other)
static bool VeryDifferentSizes(int size1, int size2)
const float kSizeRatioToReject
void PlotGradedBlobs(BLOBNBOX_LIST *blobs, ScrollView *win)
void set_diacritic_box(const TBOX &diacritic_box)
const double kStrokeWidthCJK
void set_horz_possible(bool value)
const double kDiacriticXPadRatio
void rotate_box(FCOORD rotation)
void DeleteUnownedNoise()
const int kMaxCJKSizeRatio
void set_owns_cblob(bool value)
bool leader_on_right() const
int y_gap(const TBOX &box) const
void SetNeighboursOnMediumBlobs(TO_BLOCK *block)
void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid)
void set_x(float xin)
rewrite function
int base_char_top() const
BLOBNBOX_LIST noise_blobs
bool joined_to_prev() const
ScrollView * MakeWindow(int x, int y, const char *window_name)
void RemoveBBox(BLOBNBOX *bbox)
bool contains(const FCOORD pt) const
void StartRadSearch(int x, int y, int max_radius)
const double kDiacriticYPadRatio
const double kLineResidueAspectRatio
BLOBNBOX * neighbour(BlobNeighbourDir n) const
virtual void HandleClick(int x, int y)
BlobNeighbourDir DirOtherWay(BlobNeighbourDir dir)
bool vert_possible() const
virtual void HandleClick(int x, int y)
const double kLineResidueSizeRatio
BLOBNBOX_LIST large_blobs
SVEvent * AwaitEvent(SVEventType type)
const double kMinDiacriticSizeRatio
void set_y(float yin)
rewrite function
int base_char_bottom() const
const double kStrokeWidthFractionTolerance
void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, TO_BLOCK *input_block)
GridSearch< BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT > BlobGridSearch
int x_gap(const TBOX &box) const
void Rectangle(int x1, int y1, int x2, int y2)
int textord_debug_tabfind
const double kNoiseOverlapGrowthFactor
const double kNoiseOverlapAreaFactor
void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, Pix *nontext_pix, const DENORM *denorm, bool cjk_script, TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, ColPartitionGrid *part_grid, ColPartition_LIST *big_parts)
Assume a single column of text of variable sizes.
const double kNeighbourSearchFactor
bool good_stroke_neighbour(BlobNeighbourDir n) const
static ColPartition * MakeBigPartition(BLOBNBOX *box, ColPartition_LIST *big_part_list)
bool UniquelyHorizontal() const
void GridCoords(int x, int y, int *grid_x, int *grid_y) const
bool MatchingStrokeWidth(const BLOBNBOX &other, double fractional_tolerance, double constant_tolerance) const
void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid)
const int kLineTrapLongest
float horz_stroke_width() const
bool ConfirmNoTabViolation(const BLOBNBOX &other) const
void MoveNonTextlineBlobs(BLOBNBOX_LIST *blobs, BLOBNBOX_LIST *small_blobs) const
bool horz_possible() const
BLOBNBOX_LIST small_blobs
void NeighbourGaps(int gaps[BND_COUNT]) const
void compute_bounding_box()
void pad(int xpad, int ypad)
void InsertBBox(bool h_spread, bool v_spread, BBC *bbox)
const ICOORD & bleft() const
void DisplayProjection() const
void InsertBlobList(BLOBNBOX_LIST *blobs)
int EvaluateColPartition(const ColPartition &part, const DENORM *denorm, bool debug) const
const double kCJKAspectRatioIncrease
void set_region_type(BlobRegionType new_type)
float vert_stroke_width() const
const int kCJKMaxComponents
void set_owner(tesseract::ColPartition *new_owner)
void set_base_char_blob(BLOBNBOX *blob)
const double kStrokeWidthFractionCJK
BlobRegionType region_type() const
#define BOOL_VAR(name, val, comment)
bool major_y_overlap(const TBOX &box) const
void Init(int gridsize, const ICOORD &bleft, const ICOORD &tright)
void MinMaxGapsClipped(int *h_min, int *h_max, int *v_min, int *v_max) const
bool textord_tabfind_only_strokewidths
static ScrollView::Color TextlineColor(BlobRegionType region_type, BlobTextFlowType flow_type)
const double kCJKAspectRatio
BLOBNBOX * base_char_blob() const
TBOX BoundsWithinLimits(int left, int right)
#define INT_VAR(name, val, comment)
void set_leader_on_right(bool flag)
int DistanceOfBoxFromBox(const TBOX &from_box, const TBOX &to_box, bool horizontal_textline, const DENORM *denorm, bool debug) const