tesseract  3.05.02
search_node.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: search_node.cpp
3  * Description: Implementation of the Beam Search Node Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "search_node.h"
21 
22 namespace tesseract {
23 
24 // The constructor updates the best paths and costs:
25 // mean_char_reco_cost_ (returned by BestRecoCost()) is the mean
26 // char_reco cost of the best_path, including this node.
27 // best_path_reco_cost is the total char_reco_cost of the best_path,
28 // but excludes the char_reco_cost of this node.
29 // best_cost is the mean mixed cost, i.e., mean_char_reco_cost_ +
30 // current language model cost, all weighted by the cube context's
31 // RecoWgt parameter
33  int char_reco_cost, LangModEdge *edge, int col_idx) {
34  // copy data members
35  cntxt_ = cntxt;
36  lang_mod_edge_ = edge;
37  col_idx_ = col_idx;
38  parent_node_ = parent_node;
39  char_reco_cost_ = char_reco_cost;
40 
41  // the string of this node is the same as that of the language model edge
42  str_ = (edge == NULL ? NULL : edge->EdgeString());
43 
44  // compute best path total reco cost
45  best_path_reco_cost_ = (parent_node_ == NULL) ? 0 :
46  parent_node_->CharRecoCost() + parent_node_->BestPathRecoCost();
47 
48  // update best path length
49  best_path_len_ = (parent_node_ == NULL) ?
50  1 : parent_node_->BestPathLength() + 1;
51  if (edge != NULL && edge->IsRoot() && parent_node_ != NULL) {
52  best_path_len_++;
53  }
54 
55  // compute best reco cost mean cost
56  mean_char_reco_cost_ = static_cast<int>(
57  (best_path_reco_cost_ + char_reco_cost_) /
58  static_cast<double>(best_path_len_));
59 
60  // get language model cost
61  int lm_cost = LangModCost(lang_mod_edge_, parent_node_);
62 
63  // compute aggregate best cost
64  best_cost_ = static_cast<int>(cntxt_->Params()->RecoWgt() *
65  (best_path_reco_cost_ + char_reco_cost_) /
66  static_cast<double>(best_path_len_)
67  ) + lm_cost;
68 }
69 
71  if (lang_mod_edge_ != NULL) {
72  delete lang_mod_edge_;
73  }
74 }
75 
76 // update the parent_node node if provides a better (less) cost
77 bool SearchNode::UpdateParent(SearchNode *new_parent, int new_reco_cost,
78  LangModEdge *new_edge) {
79  if (lang_mod_edge_ == NULL) {
80  if (new_edge != NULL) {
81  return false;
82  }
83  } else {
84  // to update the parent_node, we have to have the same target
85  // state and char
86  if (new_edge == NULL || !lang_mod_edge_->IsIdentical(new_edge) ||
87  !SearchNode::IdenticalPath(parent_node_, new_parent)) {
88  return false;
89  }
90  }
91 
92  // compute the path cost and combined cost of the new path
93  int new_best_path_reco_cost;
94  int new_cost;
95  int new_best_path_len;
96 
97  new_best_path_reco_cost = (new_parent == NULL) ?
98  0 : new_parent->BestPathRecoCost() + new_parent->CharRecoCost();
99 
100  new_best_path_len =
101  (new_parent == NULL) ? 1 : new_parent->BestPathLength() + 1;
102 
103  // compute the new language model cost
104  int new_lm_cost = LangModCost(new_edge, new_parent);
105 
106  new_cost = static_cast<int>(cntxt_->Params()->RecoWgt() *
107  (new_best_path_reco_cost + new_reco_cost) /
108  static_cast<double>(new_best_path_len)
109  ) + new_lm_cost;
110 
111  // update if it is better (less) than the current one
112  if (best_cost_ > new_cost) {
113  parent_node_ = new_parent;
114  char_reco_cost_ = new_reco_cost;
115  best_path_reco_cost_ = new_best_path_reco_cost;
116  best_path_len_ = new_best_path_len;
117  mean_char_reco_cost_ = static_cast<int>(
118  (best_path_reco_cost_ + char_reco_cost_) /
119  static_cast<double>(best_path_len_));
120  best_cost_ = static_cast<int>(cntxt_->Params()->RecoWgt() *
121  (best_path_reco_cost_ + char_reco_cost_) /
122  static_cast<double>(best_path_len_)
123  ) + new_lm_cost;
124  return true;
125  }
126  return false;
127 }
128 
130  SearchNode *node = this;
131 
132  // compute string length
133  int len = 0;
134 
135  while (node != NULL) {
136  if (node->str_ != NULL) {
137  len += CubeUtils::StrLen(node->str_);
138  }
139 
140  // if the edge is a root and does not have a NULL parent, account for space
141  LangModEdge *lm_edge = node->LangModelEdge();
142  if (lm_edge != NULL && lm_edge->IsRoot() && node->ParentNode() != NULL) {
143  len++;
144  }
145 
146  node = node->parent_node_;
147  }
148 
149  char_32 *char_ptr = new char_32[len + 1];
150 
151  int ch_idx = len;
152 
153  node = this;
154  char_ptr[ch_idx--] = 0;
155 
156  while (node != NULL) {
157  int str_len = ((node->str_ == NULL) ? 0 : CubeUtils::StrLen(node->str_));
158  while (str_len > 0) {
159  char_ptr[ch_idx--] = node->str_[--str_len];
160  }
161 
162  // if the edge is a root and does not have a NULL parent, insert a space
163  LangModEdge *lm_edge = node->LangModelEdge();
164  if (lm_edge != NULL && lm_edge->IsRoot() && node->ParentNode() != NULL) {
165  char_ptr[ch_idx--] = (char_32)' ';
166  }
167 
168  node = node->parent_node_;
169  }
170 
171  return char_ptr;
172 }
173 
174 // compares the path of two nodes and checks if its identical
176  if (node1 != NULL && node2 != NULL &&
177  node1->best_path_len_ != node2->best_path_len_) {
178  return false;
179  }
180 
181  // backtrack until either a root or a NULL edge is reached
182  while (node1 != NULL && node2 != NULL) {
183  if (node1->str_ != node2->str_) {
184  return false;
185  }
186 
187  // stop if either nodes is a root
188  if (node1->LangModelEdge()->IsRoot() || node2->LangModelEdge()->IsRoot()) {
189  break;
190  }
191 
192  node1 = node1->parent_node_;
193  node2 = node2->parent_node_;
194  }
195 
196  return ((node1 == NULL && node2 == NULL) ||
197  (node1 != NULL && node1->LangModelEdge()->IsRoot() &&
198  node2 != NULL && node2->LangModelEdge()->IsRoot()));
199 }
200 
201 // Computes the language model cost of a path
202 int SearchNode::LangModCost(LangModEdge *current_lm_edge,
203  SearchNode *parent_node) {
204  int lm_cost = 0;
205  int node_cnt = 0;
206 
207  do {
208  // check if root
209  bool is_root = ((current_lm_edge != NULL && current_lm_edge->IsRoot()) ||
210  parent_node == NULL);
211  if (is_root) {
212  node_cnt++;
213  lm_cost += (current_lm_edge == NULL ? 0 : current_lm_edge->PathCost());
214  }
215 
216  // continue until we hit a null parent
217  if (parent_node == NULL) {
218  break;
219  }
220 
221  // get the previous language model edge
222  current_lm_edge = parent_node->LangModelEdge();
223  // back track
224  parent_node = parent_node->ParentNode();
225  } while (true);
226 
227  return static_cast<int>(lm_cost / static_cast<double>(node_cnt));
228 }
229 } // namespace tesseract
bool UpdateParent(SearchNode *new_parent, int new_reco_cost, LangModEdge *new_edge)
Definition: search_node.cpp:77
static bool IdenticalPath(SearchNode *node1, SearchNode *node2)
SearchNode * ParentNode()
Definition: search_node.h:69
double RecoWgt() const
Definition: tuning_params.h:48
virtual const char_32 * EdgeString() const =0
virtual bool IsIdentical(LangModEdge *edge) const =0
LangModEdge * LangModelEdge()
Definition: search_node.h:70
TuningParams * Params() const
static int StrLen(const char_32 *str)
Definition: cube_utils.cpp:54
virtual int PathCost() const =0
signed int char_32
Definition: string_32.h:40
virtual bool IsRoot() const =0
SearchNode(CubeRecoContext *cntxt, SearchNode *parent_node, int char_reco_cost, LangModEdge *edge, int col_idx)
Definition: search_node.cpp:32