tesseract  3.05.02
tesseract::BeamSearch Class Reference

#include <beam_search.h>

Public Member Functions

 BeamSearch (CubeRecoContext *cntxt, bool word_mode=true)
 
 ~BeamSearch ()
 
WordAltListSearch (SearchObject *srch_obj, LangModel *lang_mod=NULL)
 
SearchNodeBestNode () const
 
char_32Alt (int alt) const
 
CharSamp ** BackTrack (SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
 
CharSamp ** BackTrack (SearchObject *srch_obj, SearchNode *node, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
 
int SizeCost (SearchObject *srch_obj, SearchNode *node, char_32 **str32=NULL) const
 
int WordUnigramCost (char_32 *str32, WordUnigrams *word_unigrams) const
 
int ColCnt () const
 
SearchColumnColumn (int col_idx) const
 
int BestPresortedNodeIndex () const
 

Detailed Description

Definition at line 44 of file beam_search.h.

Constructor & Destructor Documentation

◆ BeamSearch()

tesseract::BeamSearch::BeamSearch ( CubeRecoContext cntxt,
bool  word_mode = true 
)
explicit

Definition at line 27 of file beam_search.cpp.

27  {
28  cntxt_ = cntxt;
29  seg_pt_cnt_ = 0;
30  col_cnt_ = 1;
31  col_ = NULL;
32  word_mode_ = word_mode;
33 }

◆ ~BeamSearch()

tesseract::BeamSearch::~BeamSearch ( )

Definition at line 46 of file beam_search.cpp.

46  {
47  Cleanup();
48 }

Member Function Documentation

◆ Alt()

char_32 * tesseract::BeamSearch::Alt ( int  alt) const

Definition at line 287 of file beam_search.cpp.

287  {
288  // get the last column of the lattice
289  if (col_cnt_ <= 0)
290  return NULL;
291 
292  SearchColumn *srch_col = col_[col_cnt_ - 1];
293  if (!srch_col)
294  return NULL;
295 
296  // point to the last node in the selected path
297  if (alt >= srch_col->NodeCount() || srch_col->Nodes() == NULL) {
298  return NULL;
299  }
300 
301  SearchNode *srch_node = srch_col->Nodes()[alt];
302  if (!srch_node)
303  return NULL;
304 
305  // get string
306  char_32 *str32 = srch_node->PathString();
307  if (!str32)
308  return NULL;
309 
310  return str32;
311 }
SearchNode ** Nodes() const
Definition: search_column.h:44
signed int char_32
Definition: string_32.h:40

◆ BackTrack() [1/2]

CharSamp ** tesseract::BeamSearch::BackTrack ( SearchObject srch_obj,
int  node_index,
int *  char_cnt,
char_32 **  str32,
Boxa **  char_boxes 
) const

Definition at line 317 of file beam_search.cpp.

319  {
320  // get the last column of the lattice
321  if (col_cnt_ <= 0)
322  return NULL;
323  SearchColumn *srch_col = col_[col_cnt_ - 1];
324  if (!srch_col)
325  return NULL;
326 
327  // point to the last node in the selected path
328  if (node_index >= srch_col->NodeCount() || !srch_col->Nodes())
329  return NULL;
330 
331  SearchNode *srch_node = srch_col->Nodes()[node_index];
332  if (!srch_node)
333  return NULL;
334  return BackTrack(srch_obj, srch_node, char_cnt, str32, char_boxes);
335 }
CharSamp ** BackTrack(SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
SearchNode ** Nodes() const
Definition: search_column.h:44

◆ BackTrack() [2/2]

CharSamp ** tesseract::BeamSearch::BackTrack ( SearchObject srch_obj,
SearchNode node,
int *  char_cnt,
char_32 **  str32,
Boxa **  char_boxes 
) const

Definition at line 341 of file beam_search.cpp.

343  {
344  if (!srch_node)
345  return NULL;
346 
347  if (str32) {
348  delete [](*str32); // clear existing value
349  *str32 = srch_node->PathString();
350  if (!*str32)
351  return NULL;
352  }
353 
354  if (char_boxes && *char_boxes) {
355  boxaDestroy(char_boxes); // clear existing value
356  }
357 
358  CharSamp **chars;
359  chars = SplitByNode(srch_obj, srch_node, char_cnt, char_boxes);
360  if (!chars && str32)
361  delete []*str32;
362  return chars;
363 }

◆ BestNode()

SearchNode * tesseract::BeamSearch::BestNode ( ) const

Definition at line 275 of file beam_search.cpp.

275  {
276  if (col_cnt_ < 1 || !col_ || !col_[col_cnt_ - 1])
277  return NULL;
278 
279  int node_cnt = col_[col_cnt_ - 1]->NodeCount();
280  SearchNode **srch_nodes = col_[col_cnt_ - 1]->Nodes();
281  if (node_cnt < 1 || !srch_nodes || !srch_nodes[0])
282  return NULL;
283  return srch_nodes[0];
284 }
SearchNode ** Nodes() const
Definition: search_column.h:44

◆ BestPresortedNodeIndex()

int tesseract::BeamSearch::BestPresortedNodeIndex ( ) const
inline

Definition at line 81 of file beam_search.h.

81  {
82  return best_presorted_node_idx_;
83  }

◆ ColCnt()

int tesseract::BeamSearch::ColCnt ( ) const
inline

Definition at line 76 of file beam_search.h.

76 { return col_cnt_; }

◆ Column()

SearchColumn * tesseract::BeamSearch::Column ( int  col_idx) const

Definition at line 268 of file beam_search.cpp.

268  {
269  if (col < 0 || col >= col_cnt_ || !col_)
270  return NULL;
271  return col_[col];
272 }

◆ Search()

WordAltList * tesseract::BeamSearch::Search ( SearchObject srch_obj,
LangModel lang_mod = NULL 
)

Definition at line 97 of file beam_search.cpp.

97  {
98  // verifications
99  if (!lang_mod)
100  lang_mod = cntxt_->LangMod();
101  if (!lang_mod) {
102  fprintf(stderr, "Cube ERROR (BeamSearch::Search): could not construct "
103  "LangModel\n");
104  return NULL;
105  }
106 
107  // free existing state
108  Cleanup();
109 
110  // get seg pt count
111  seg_pt_cnt_ = srch_obj->SegPtCnt();
112  if (seg_pt_cnt_ < 0) {
113  return NULL;
114  }
115  col_cnt_ = seg_pt_cnt_ + 1;
116 
117  // disregard suspicious cases
118  if (seg_pt_cnt_ > 128) {
119  fprintf(stderr, "Cube ERROR (BeamSearch::Search): segment point count is "
120  "suspiciously high; bailing out\n");
121  return NULL;
122  }
123 
124  // alloc memory for columns
125  col_ = new SearchColumn *[col_cnt_];
126  memset(col_, 0, col_cnt_ * sizeof(*col_));
127 
128  // for all possible segments
129  for (int end_seg = 1; end_seg <= (seg_pt_cnt_ + 1); end_seg++) {
130  // create a search column
131  col_[end_seg - 1] = new SearchColumn(end_seg - 1,
132  cntxt_->Params()->BeamWidth());
133 
134  // for all possible start segments
135  int init_seg = MAX(0, end_seg - cntxt_->Params()->MaxSegPerChar());
136  for (int strt_seg = init_seg; strt_seg < end_seg; strt_seg++) {
137  int parent_nodes_cnt;
138  SearchNode **parent_nodes;
139 
140  // for the root segment, we do not have a parent
141  if (strt_seg == 0) {
142  parent_nodes_cnt = 1;
143  parent_nodes = NULL;
144  } else {
145  // for all the existing nodes in the starting column
146  parent_nodes_cnt = col_[strt_seg - 1]->NodeCount();
147  parent_nodes = col_[strt_seg - 1]->Nodes();
148  }
149 
150  // run the shape recognizer
151  CharAltList *char_alt_list = srch_obj->RecognizeSegment(strt_seg - 1,
152  end_seg - 1);
153  // for all the possible parents
154  for (int parent_idx = 0; parent_idx < parent_nodes_cnt; parent_idx++) {
155  // point to the parent node
156  SearchNode *parent_node = !parent_nodes ? NULL
157  : parent_nodes[parent_idx];
158  LangModEdge *lm_parent_edge = !parent_node ? lang_mod->Root()
159  : parent_node->LangModelEdge();
160 
161  // compute the cost of not having spaces within the segment range
162  int contig_cost = srch_obj->NoSpaceCost(strt_seg - 1, end_seg - 1);
163 
164  // In phrase mode, compute the cost of not having a space before
165  // this character
166  int no_space_cost = 0;
167  if (!word_mode_ && strt_seg > 0) {
168  no_space_cost = srch_obj->NoSpaceCost(strt_seg - 1);
169  }
170 
171  // if the no space cost is low enough
172  if ((contig_cost + no_space_cost) < MIN_PROB_COST) {
173  // Add the children nodes
174  CreateChildren(col_[end_seg - 1], lang_mod, parent_node,
175  lm_parent_edge, char_alt_list,
176  contig_cost + no_space_cost);
177  }
178 
179  // In phrase mode and if not starting at the root
180  if (!word_mode_ && strt_seg > 0) { // parent_node must be non-NULL
181  // consider starting a new word for nodes that are valid EOW
182  if (parent_node->LangModelEdge()->IsEOW()) {
183  // get the space cost
184  int space_cost = srch_obj->SpaceCost(strt_seg - 1);
185  // if the space cost is low enough
186  if ((contig_cost + space_cost) < MIN_PROB_COST) {
187  // Restart the language model and add nodes as children to the
188  // space node.
189  CreateChildren(col_[end_seg - 1], lang_mod, parent_node, NULL,
190  char_alt_list, contig_cost + space_cost);
191  }
192  }
193  }
194  } // parent
195  } // strt_seg
196 
197  // prune the column nodes
198  col_[end_seg - 1]->Prune();
199 
200  // Free the column hash table. No longer needed
201  col_[end_seg - 1]->FreeHashTable();
202  } // end_seg
203 
204  WordAltList *alt_list = CreateWordAltList(srch_obj);
205  return alt_list;
206 }
SearchNode ** Nodes() const
Definition: search_column.h:44
LangModel * LangMod() const
LangModEdge * LangModelEdge()
Definition: search_node.h:70
TuningParams * Params() const
#define MAX(x, y)
Definition: ndminx.h:24
#define MIN_PROB_COST
Definition: cube_const.h:26

◆ SizeCost()

int tesseract::BeamSearch::SizeCost ( SearchObject srch_obj,
SearchNode node,
char_32 **  str32 = NULL 
) const

Definition at line 455 of file beam_search.cpp.

456  {
457  CharSamp **chars = NULL;
458  int char_cnt = 0;
459  if (!node)
460  return 0;
461  // Backtrack to get string and character segmentation
462  chars = BackTrack(srch_obj, node, &char_cnt, str32, NULL);
463  if (!chars)
464  return WORST_COST;
465  int size_cost = (cntxt_->SizeModel() == NULL) ? 0 :
466  cntxt_->SizeModel()->Cost(chars, char_cnt);
467  delete []chars;
468  return size_cost;
469 }
CharSamp ** BackTrack(SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
#define WORST_COST
Definition: cube_const.h:30
int Cost(CharSamp **samp_array, int samp_cnt) const
WordSizeModel * SizeModel() const

◆ WordUnigramCost()

int tesseract::BeamSearch::WordUnigramCost ( char_32 str32,
WordUnigrams word_unigrams 
) const

The documentation for this class was generated from the following files: