tesseract  3.05.02
GAPMAP Class Reference

#include <gap_map.h>

Public Member Functions

 GAPMAP (TO_BLOCK *block)
 
 ~GAPMAP ()
 
BOOL8 table_gap (inT16 left, inT16 right)
 

Detailed Description

Definition at line 15 of file gap_map.h.

Constructor & Destructor Documentation

◆ GAPMAP()

GAPMAP::GAPMAP ( TO_BLOCK block)

Definition at line 35 of file gap_map.cpp.

37  {
38  TO_ROW_IT row_it; //row iterator
39  TO_ROW *row; //current row
40  BLOBNBOX_IT blob_it; //iterator
41  TBOX blob_box;
42  TBOX prev_blob_box;
43  inT16 gap_width;
44  inT16 start_of_row;
45  inT16 end_of_row;
46  STATS xht_stats (0, 128);
47  inT16 min_quantum;
48  inT16 max_quantum;
49  inT16 i;
50 
51  row_it.set_to_list (block->get_rows ());
52  /*
53  Find left and right extremes and bucket size
54  */
55  map = NULL;
56  min_left = MAX_INT16;
57  max_right = -MAX_INT16;
58  total_rows = 0;
59  any_tabs = FALSE;
60  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
61  row = row_it.data ();
62  if (!row->blob_list ()->empty ()) {
63  total_rows++;
64  xht_stats.add ((inT16) floor (row->xheight + 0.5), 1);
65  blob_it.set_to_list (row->blob_list ());
66  start_of_row = blob_it.data ()->bounding_box ().left ();
67  end_of_row = blob_it.data_relative (-1)->bounding_box ().right ();
68  if (min_left > start_of_row)
69  min_left = start_of_row;
70  if (max_right < end_of_row)
71  max_right = end_of_row;
72  }
73  }
74  if ((total_rows < 3) || (min_left >= max_right)) {
75  total_rows = 0;
76  min_left = max_right = 0;
77  return;
78  }
79  bucket_size = (inT16) floor (xht_stats.median () + 0.5) / 2;
80  map_max = (max_right - min_left) / bucket_size;
81  map = (inT16 *) alloc_mem ((map_max + 1) * sizeof (inT16));
82  for (i = 0; i <= map_max; i++)
83  map[i] = 0;
84 
85  for (row_it.mark_cycle_pt (); !row_it.cycled_list (); row_it.forward ()) {
86  row = row_it.data ();
87  if (!row->blob_list ()->empty ()) {
88  blob_it.set_to_list (row->blob_list ());
89  blob_it.mark_cycle_pt ();
90  blob_box = box_next (&blob_it);
91  prev_blob_box = blob_box;
92  if (gapmap_use_ends) {
93  /* Leading space */
94  gap_width = blob_box.left () - min_left;
95  if ((gap_width > gapmap_big_gaps * row->xheight)
96  && gap_width > 2) {
97  max_quantum = (blob_box.left () - min_left) / bucket_size;
98  if (max_quantum > map_max) max_quantum = map_max;
99  for (i = 0; i <= max_quantum; i++)
100  map[i]++;
101  }
102  }
103  while (!blob_it.cycled_list ()) {
104  blob_box = box_next (&blob_it);
105  gap_width = blob_box.left () - prev_blob_box.right ();
106  if ((gap_width > gapmap_big_gaps * row->xheight)
107  && gap_width > 2) {
108  min_quantum =
109  (prev_blob_box.right () - min_left) / bucket_size;
110  max_quantum = (blob_box.left () - min_left) / bucket_size;
111  if (max_quantum > map_max) max_quantum = map_max;
112  for (i = min_quantum; i <= max_quantum; i++)
113  map[i]++;
114  }
115  prev_blob_box = blob_box;
116  }
117  if (gapmap_use_ends) {
118  /* Trailing space */
119  gap_width = max_right - prev_blob_box.right ();
120  if ((gap_width > gapmap_big_gaps * row->xheight)
121  && gap_width > 2) {
122  min_quantum =
123  (prev_blob_box.right () - min_left) / bucket_size;
124  if (min_quantum < 0) min_quantum = 0;
125  for (i = min_quantum; i <= map_max; i++)
126  map[i]++;
127  }
128  }
129  }
130  }
131  for (i = 0; i <= map_max; i++) {
132  if (map[i] > total_rows / 2) {
134  (((i == 0) &&
135  (map[i + 1] <= total_rows / 2)) ||
136  ((i == map_max) &&
137  (map[i - 1] <= total_rows / 2)) ||
138  ((i > 0) &&
139  (i < map_max) &&
140  (map[i - 1] <= total_rows / 2) &&
141  (map[i + 1] <= total_rows / 2)))) {
142  map[i] = 0; //prevent isolated quantum
143  }
144  else
145  any_tabs = TRUE;
146  }
147  }
148  if (gapmap_debug && any_tabs)
149  tprintf ("Table found\n");
150 }
#define TRUE
Definition: capi.h:45
EXTERN bool gapmap_debug
Definition: gap_map.cpp:14
short inT16
Definition: host.h:33
EXTERN bool gapmap_use_ends
Definition: gap_map.cpp:16
BLOBNBOX_LIST * blob_list()
Definition: blobbox.h:595
EXTERN bool gapmap_no_isolated_quanta
Definition: gap_map.cpp:18
#define MAX_INT16
Definition: host.h:52
#define FALSE
Definition: capi.h:46
void * alloc_mem(inT32 count)
Definition: memry.cpp:47
TBOX box_next(BLOBNBOX_IT *it)
Definition: blobbox.cpp:631
EXTERN double gapmap_big_gaps
Definition: gap_map.cpp:19
inT16 left() const
Definition: rect.h:68
TO_ROW_LIST * get_rows()
Definition: blobbox.h:700
#define tprintf(...)
Definition: tprintf.h:31
float xheight
Definition: blobbox.h:653
Definition: rect.h:30
inT16 right() const
Definition: rect.h:75
Definition: statistc.h:33

◆ ~GAPMAP()

GAPMAP::~GAPMAP ( )
inline

Definition at line 21 of file gap_map.h.

21  { //destructor
22  if (map != NULL)
23  free_mem(map);
24  }
void free_mem(void *oldchunk)
Definition: memry.cpp:55

Member Function Documentation

◆ table_gap()

BOOL8 GAPMAP::table_gap ( inT16  left,
inT16  right 
)

Definition at line 159 of file gap_map.cpp.

162  {
163  inT16 min_quantum;
164  inT16 max_quantum;
165  inT16 i;
166  BOOL8 tab_found = FALSE;
167 
168  if (!any_tabs)
169  return FALSE;
170 
171  min_quantum = (left - min_left) / bucket_size;
172  max_quantum = (right - min_left) / bucket_size;
173  // Clip to the bounds of the array. In some circumstances (big blob followed
174  // by small blob) max_quantum can exceed the map_max bounds, but we clip
175  // here instead, as it provides better long-term safety.
176  if (min_quantum < 0) min_quantum = 0;
177  if (max_quantum > map_max) max_quantum = map_max;
178  for (i = min_quantum; (!tab_found && (i <= max_quantum)); i++)
179  if (map[i] > total_rows / 2)
180  tab_found = TRUE;
181  return tab_found;
182 }
#define TRUE
Definition: capi.h:45
short inT16
Definition: host.h:33
unsigned char BOOL8
Definition: host.h:46
#define FALSE
Definition: capi.h:46

The documentation for this class was generated from the following files: