tesseract  3.05.02
beam_search.h
Go to the documentation of this file.
1 /**********************************************************************
2  * File: beam_search.h
3  * Description: Declaration of Beam Word Search Algorithm Class
4  * Author: Ahmad Abdulkader
5  * Created: 2007
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 // The Beam Search class implements a Beam Search algorithm for the
21 // N-best paths through the lattice of a search object using a language model
22 // The search object is a segmented bitmap of a word image. The language model
23 // is a state machine that defines valid sequences of characters
24 // The cost of each path is the combined (product) probabilities of the
25 // characters along the path. The character probabilities are computed using
26 // the character classifier member of the RecoContext
27 // The BeamSearch class itself holds the state of the last search it performed
28 // using its "Search" method. Subsequent class to the Search method erase the
29 // states of previously done searches
30 
31 #ifndef BEAM_SEARCH_H
32 #define BEAM_SEARCH_H
33 
34 #include "search_column.h"
35 #include "word_altlist.h"
36 #include "search_object.h"
37 #include "lang_model.h"
38 #include "cube_utils.h"
39 #include "cube_reco_context.h"
40 #include "allheaders.h"
41 
42 namespace tesseract {
43 
44 class BeamSearch {
45  public:
46  explicit BeamSearch(CubeRecoContext *cntxt, bool word_mode = true);
47  ~BeamSearch();
48  // Performs a beam search in the specified search using the specified
49  // language model; returns an alternate list of possible words as a result.
50  WordAltList *Search(SearchObject *srch_obj, LangModel *lang_mod = NULL);
51  // Returns the best node in the last column of last performed search.
52  SearchNode *BestNode() const;
53  // Returns the string corresponding to the specified alt.
54  char_32 *Alt(int alt) const;
55  // Backtracks from the specified lattice node and returns the corresponding
56  // character-mapped segments, character count, char_32 result string, and
57  // character bounding boxes (if char_boxes is not NULL). If the segments
58  // cannot be constructed, returns NULL, and all result arguments
59  // will be NULL.
60  CharSamp **BackTrack(SearchObject *srch_obj, int node_index,
61  int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
62  // Same as above, except it takes a pointer to a search node object
63  // instead of node index.
64  CharSamp **BackTrack(SearchObject *srch_obj, SearchNode *node,
65  int *char_cnt, char_32 **str32, Boxa **char_boxes) const;
66  // Returns the size cost of a specified string of a lattice
67  // path that ends at the specified lattice node.
68  int SizeCost(SearchObject *srch_obj, SearchNode *node,
69  char_32 **str32 = NULL) const;
70  // Returns the word unigram cost of the given string, possibly
71  // stripping out a single trailing punctuation character.
72  int WordUnigramCost(char_32 *str32, WordUnigrams* word_unigrams) const;
73 
74  // Supplementary functions needed for visualization
75  // Return column count of the lattice.
76  inline int ColCnt() const { return col_cnt_; }
77  // Returns the lattice column corresponding to the specified column index.
78  SearchColumn *Column(int col_idx) const;
79  // Return the index of the best node in the last column of the
80  // best-cost path before the alternates list is sorted.
81  inline int BestPresortedNodeIndex() const {
82  return best_presorted_node_idx_;
83  }
84 
85  private:
86  // Maximum reasonable segmentation point count
87  static const int kMaxSegPointCnt = 128;
88  // Recognition context object; the context holds the character classifier
89  // and the tuning parameters object
90  CubeRecoContext *cntxt_;
91  // Count of segmentation pts
92  int seg_pt_cnt_;
93  // Lattice column count; currently redundant with respect to seg_pt_cnt_
94  // but that might change in the future
95  int col_cnt_;
96  // Array of lattice columns
97  SearchColumn **col_;
98  // Run in word or phrase mode
99  bool word_mode_;
100  // Node index of best-cost node, before alternates are merged and sorted
101  int best_presorted_node_idx_;
102  // Cleans up beam search state
103  void Cleanup();
104  // Creates a Word alternate list from the results in the lattice.
105  // This function computes a cost for each node in the final column
106  // of the lattice, which is a weighted average of several costs:
107  // size cost, character bigram cost, word unigram cost, and
108  // recognition cost from the beam search. The weights are the
109  // CubeTuningParams, which are learned together with the character
110  // classifiers.
111  WordAltList *CreateWordAltList(SearchObject *srch_obj);
112  // Creates a set of children nodes emerging from a parent node based on
113  // the character alternate list and the language model.
114  void CreateChildren(SearchColumn *out_col, LangModel *lang_mod,
115  SearchNode *parent_node, LangModEdge *lm_parent_edge,
116  CharAltList *char_alt_list, int extra_cost);
117  // Backtracks from the given lattice node and returns the corresponding
118  // char mapped segments, character count, and character bounding boxes (if
119  // char_boxes is not NULL). If the segments cannot be constructed,
120  // returns NULL, and all result arguments will be NULL.
121  CharSamp **SplitByNode(SearchObject *srch_obj, SearchNode *srch_node,
122  int* char_cnt, Boxa **char_boxes) const;
123 };
124 }
125 
126 #endif // BEAM_SEARCH_H
WordAltList * Search(SearchObject *srch_obj, LangModel *lang_mod=NULL)
Definition: beam_search.cpp:97
SearchNode * BestNode() const
CharSamp ** BackTrack(SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
SearchColumn * Column(int col_idx) const
int BestPresortedNodeIndex() const
Definition: beam_search.h:81
int SizeCost(SearchObject *srch_obj, SearchNode *node, char_32 **str32=NULL) const
signed int char_32
Definition: string_32.h:40
BeamSearch(CubeRecoContext *cntxt, bool word_mode=true)
Definition: beam_search.cpp:27
char_32 * Alt(int alt) const
int ColCnt() const
Definition: beam_search.h:76
int WordUnigramCost(char_32 *str32, WordUnigrams *word_unigrams) const