tesseract  3.05.02
ocrblock.cpp File Reference
#include <stdlib.h>
#include "blckerr.h"
#include "ocrblock.h"
#include "stepblob.h"
#include "tprintf.h"

Go to the source code of this file.

Macros

#define BLOCK_LABEL_HEIGHT   150
 
#define ROW_SPACING   5
 

Functions

int decreasing_top_order (const void *row1, const void *row2)
 
bool LeftMargin (ICOORDELT_LIST *segments, int x, int *margin)
 
bool RightMargin (ICOORDELT_LIST *segments, int x, int *margin)
 
void PrintSegmentationStats (BLOCK_LIST *block_list)
 
void ExtractBlobsFromSegmentation (BLOCK_LIST *blocks, C_BLOB_LIST *output_blob_list)
 
void RefreshWordBlobsFromNewBlobs (BLOCK_LIST *block_list, C_BLOB_LIST *new_blobs, C_BLOB_LIST *not_found_blobs)
 

Macro Definition Documentation

◆ BLOCK_LABEL_HEIGHT

#define BLOCK_LABEL_HEIGHT   150

Definition at line 26 of file ocrblock.cpp.

◆ ROW_SPACING

#define ROW_SPACING   5

Function Documentation

◆ decreasing_top_order()

int decreasing_top_order ( const void *  row1,
const void *  row2 
)

decreasing_top_order

Sort Comparator: Return <0 if row1 top < row2 top

Definition at line 71 of file ocrblock.cpp.

73  {
74  return (*(ROW **) row2)->bounding_box ().top () -
75  (*(ROW **) row1)->bounding_box ().top ();
76 }
Definition: ocrrow.h:32

◆ ExtractBlobsFromSegmentation()

void ExtractBlobsFromSegmentation ( BLOCK_LIST *  blocks,
C_BLOB_LIST *  output_blob_list 
)

Definition at line 444 of file ocrblock.cpp.

445  {
446  C_BLOB_IT return_list_it(output_blob_list);
447  BLOCK_IT block_it(blocks);
448  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
449  BLOCK* block = block_it.data();
450  ROW_IT row_it(block->row_list());
451  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
452  ROW* row = row_it.data();
453  // Iterate over all werds in the row.
454  WERD_IT werd_it(row->word_list());
455  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
456  WERD* werd = werd_it.data();
457  return_list_it.move_to_last();
458  return_list_it.add_list_after(werd->cblob_list());
459  return_list_it.move_to_last();
460  return_list_it.add_list_after(werd->rej_cblob_list());
461  }
462  }
463  }
464 }
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
Definition: werd.h:60
Definition: ocrrow.h:32
Definition: ocrblock.h:30
WERD_LIST * word_list()
Definition: ocrrow.h:52
C_BLOB_LIST * cblob_list()
Definition: werd.h:100
C_BLOB_LIST * rej_cblob_list()
Definition: werd.h:95

◆ LeftMargin()

bool LeftMargin ( ICOORDELT_LIST *  segments,
int  x,
int *  margin 
)

Definition at line 255 of file ocrblock.cpp.

255  {
256  bool found = false;
257  *margin = 0;
258  if (segments->empty())
259  return found;
260  ICOORDELT_IT seg_it(segments);
261  for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
262  int cur_margin = x - seg_it.data()->x();
263  if (cur_margin >= 0) {
264  if (!found) {
265  *margin = cur_margin;
266  } else if (cur_margin < *margin) {
267  *margin = cur_margin;
268  }
269  found = true;
270  }
271  }
272  return found;
273 }

◆ PrintSegmentationStats()

void PrintSegmentationStats ( BLOCK_LIST *  block_list)

Definition at line 411 of file ocrblock.cpp.

411  {
412  int num_blocks = 0;
413  int num_rows = 0;
414  int num_words = 0;
415  int num_blobs = 0;
416  BLOCK_IT block_it(block_list);
417  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
418  BLOCK* block = block_it.data();
419  ++num_blocks;
420  ROW_IT row_it(block->row_list());
421  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
422  ++num_rows;
423  ROW* row = row_it.data();
424  // Iterate over all werds in the row.
425  WERD_IT werd_it(row->word_list());
426  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
427  WERD* werd = werd_it.data();
428  ++num_words;
429  num_blobs += werd->cblob_list()->length();
430  }
431  }
432  }
433  tprintf("Block list stats:\nBlocks = %d\nRows = %d\nWords = %d\nBlobs = %d\n",
434  num_blocks, num_rows, num_words, num_blobs);
435 }
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
Definition: werd.h:60
#define tprintf(...)
Definition: tprintf.h:31
Definition: ocrrow.h:32
Definition: ocrblock.h:30
WERD_LIST * word_list()
Definition: ocrrow.h:52
C_BLOB_LIST * cblob_list()
Definition: werd.h:100

◆ RefreshWordBlobsFromNewBlobs()

void RefreshWordBlobsFromNewBlobs ( BLOCK_LIST *  block_list,
C_BLOB_LIST *  new_blobs,
C_BLOB_LIST *  not_found_blobs 
)

Definition at line 479 of file ocrblock.cpp.

481  {
482  // Now iterate over all the blobs in the segmentation_block_list_, and just
483  // replace the corresponding c-blobs inside the werds.
484  BLOCK_IT block_it(block_list);
485  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
486  BLOCK* block = block_it.data();
487  if (block->poly_block() != NULL && !block->poly_block()->IsText())
488  continue; // Don't touch non-text blocks.
489  // Iterate over all rows in the block.
490  ROW_IT row_it(block->row_list());
491  for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
492  ROW* row = row_it.data();
493  // Iterate over all werds in the row.
494  WERD_IT werd_it(row->word_list());
495  WERD_LIST new_words;
496  WERD_IT new_words_it(&new_words);
497  for (werd_it.mark_cycle_pt(); !werd_it.cycled_list(); werd_it.forward()) {
498  WERD* werd = werd_it.extract();
499  WERD* new_werd = werd->ConstructWerdWithNewBlobs(new_blobs,
500  not_found_blobs);
501  if (new_werd) {
502  // Insert this new werd into the actual row's werd-list. Remove the
503  // existing one.
504  new_words_it.add_after_then_move(new_werd);
505  delete werd;
506  } else {
507  // Reinsert the older word back, for lack of better options.
508  // This is critical since dropping the words messes up segmentation:
509  // eg. 1st word in the row might otherwise have W_FUZZY_NON turned on.
510  new_words_it.add_after_then_move(werd);
511  }
512  }
513  // Get rid of the old word list & replace it with the new one.
514  row->word_list()->clear();
515  werd_it.move_to_first();
516  werd_it.add_list_after(&new_words);
517  }
518  }
519 }
ROW_LIST * row_list()
get rows
Definition: ocrblock.h:120
POLY_BLOCK * poly_block() const
Definition: pdblock.h:55
Definition: werd.h:60
Definition: ocrrow.h:32
bool IsText() const
Definition: polyblk.h:52
Definition: ocrblock.h:30
WERD * ConstructWerdWithNewBlobs(C_BLOB_LIST *all_blobs, C_BLOB_LIST *orphan_blobs)
Definition: werd.cpp:412
WERD_LIST * word_list()
Definition: ocrrow.h:52

◆ RightMargin()

bool RightMargin ( ICOORDELT_LIST *  segments,
int  x,
int *  margin 
)

Definition at line 285 of file ocrblock.cpp.

285  {
286  bool found = false;
287  *margin = 0;
288  if (segments->empty())
289  return found;
290  ICOORDELT_IT seg_it(segments);
291  for (seg_it.mark_cycle_pt(); !seg_it.cycled_list(); seg_it.forward()) {
292  int cur_margin = seg_it.data()->x() + seg_it.data()->y() - x;
293  if (cur_margin >= 0) {
294  if (!found) {
295  *margin = cur_margin;
296  } else if (cur_margin < *margin) {
297  *margin = cur_margin;
298  }
299  found = true;
300  }
301  }
302  return found;
303 }