tesseract  3.05.02
search_column.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: search_column.cpp
3  * Description: Implementation of the Beam Search Column Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "search_column.h"
21 #include <stdlib.h>
22 
23 namespace tesseract {
24 
25 SearchColumn::SearchColumn(int col_idx, int max_node) {
26  col_idx_ = col_idx;
27  node_cnt_ = 0;
28  node_array_ = NULL;
29  max_node_cnt_ = max_node;
30  node_hash_table_ = NULL;
31  init_ = false;
32  min_cost_ = INT_MAX;
33  max_cost_ = 0;
34 }
35 
36 // Cleanup data
37 void SearchColumn::Cleanup() {
38  if (node_array_ != NULL) {
39  for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
40  if (node_array_[node_idx] != NULL) {
41  delete node_array_[node_idx];
42  }
43  }
44 
45  delete []node_array_;
46  node_array_ = NULL;
47  }
48  FreeHashTable();
49  init_ = false;
50 }
51 
53  Cleanup();
54 }
55 
56 // Initializations
57 bool SearchColumn::Init() {
58  if (init_ == true) {
59  return true;
60  }
61 
62  // create hash table
63  if (node_hash_table_ == NULL) {
64  node_hash_table_ = new SearchNodeHashTable();
65  }
66 
67  init_ = true;
68 
69  return true;
70 }
71 
72 // Prune the nodes if necessary. Pruning is done such that a max
73 // number of nodes is kept, i.e., the beam width
75  // no need to prune
76  if (node_cnt_ <= max_node_cnt_) {
77  return;
78  }
79 
80  // compute the cost histogram
81  memset(score_bins_, 0, sizeof(score_bins_));
82  int cost_range = max_cost_ - min_cost_ + 1;
83  for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
84  int cost_bin = static_cast<int>(
85  ((node_array_[node_idx]->BestCost() - min_cost_) *
86  kScoreBins) / static_cast<double>(cost_range));
87  if (cost_bin >= kScoreBins) {
88  cost_bin = kScoreBins - 1;
89  }
90  score_bins_[cost_bin]++;
91  }
92 
93  // determine the pruning cost by scanning the cost histogram from
94  // least to greatest cost bins and finding the cost at which the
95  // max number of nodes is exceeded
96  int pruning_cost = 0;
97  int new_node_cnt = 0;
98  for (int cost_bin = 0; cost_bin < kScoreBins; cost_bin++) {
99  if (new_node_cnt > 0 &&
100  (new_node_cnt + score_bins_[cost_bin]) > max_node_cnt_) {
101  pruning_cost = min_cost_ + ((cost_bin * cost_range) / kScoreBins);
102  break;
103  }
104  new_node_cnt += score_bins_[cost_bin];
105  }
106 
107  // prune out all the nodes above this cost
108  for (int node_idx = new_node_cnt = 0; node_idx < node_cnt_; node_idx++) {
109  // prune this node out
110  if (node_array_[node_idx]->BestCost() > pruning_cost ||
111  new_node_cnt > max_node_cnt_) {
112  delete node_array_[node_idx];
113  } else {
114  // keep it
115  node_array_[new_node_cnt++] = node_array_[node_idx];
116  }
117  }
118  node_cnt_ = new_node_cnt;
119 }
120 
121 // sort all nodes
123  if (node_cnt_ > 0 && node_array_ != NULL) {
124  qsort(node_array_, node_cnt_, sizeof(*node_array_),
126  }
127 }
128 
129 // add a new node
131  SearchNode *parent_node,
132  CubeRecoContext *cntxt) {
133  // init if necessary
134  if (init_ == false && Init() == false) {
135  return NULL;
136  }
137 
138  // find out if we have an node with the same edge
139  // look in the hash table
140  SearchNode *new_node = node_hash_table_->Lookup(edge, parent_node);
141  // node does not exist
142  if (new_node == NULL) {
143  new_node = new SearchNode(cntxt, parent_node, reco_cost, edge, col_idx_);
144 
145  // if the max node count has already been reached, check if the cost of
146  // the new node exceeds the max cost. This indicates that it will be pruned
147  // and so there is no point adding it
148  if (node_cnt_ >= max_node_cnt_ && new_node->BestCost() > max_cost_) {
149  delete new_node;
150  return NULL;
151  }
152 
153  // expand the node buffer if necc
154  if ((node_cnt_ % kNodeAllocChunk) == 0) {
155  // alloc a new buff
156  SearchNode **new_node_buff =
157  new SearchNode *[node_cnt_ + kNodeAllocChunk];
158 
159  // free existing after copying contents
160  if (node_array_ != NULL) {
161  memcpy(new_node_buff, node_array_, node_cnt_ * sizeof(*new_node_buff));
162  delete []node_array_;
163  }
164 
165  node_array_ = new_node_buff;
166  }
167 
168  // add the node to the hash table only if it is non-OOD edge
169  // because the langmod state is not unique
170  if (edge->IsOOD() == false) {
171  if (!node_hash_table_->Insert(edge, new_node)) {
172  tprintf("Hash table full!!!");
173  delete new_node;
174  return NULL;
175  }
176  }
177 
178  node_array_[node_cnt_++] = new_node;
179 
180  } else {
181  // node exists before
182  // if no update occurred, return NULL
183  if (new_node->UpdateParent(parent_node, reco_cost, edge) == false) {
184  new_node = NULL;
185  }
186 
187  // free the edge
188  delete edge;
189  }
190 
191  // update Min and Max Costs
192  if (new_node != NULL) {
193  if (min_cost_ > new_node->BestCost()) {
194  min_cost_ = new_node->BestCost();
195  }
196 
197  if (max_cost_ < new_node->BestCost()) {
198  max_cost_ = new_node->BestCost();
199  }
200  }
201 
202  return new_node;
203 }
204 
206  SearchNode *best_node = NULL;
207 
208  for (int node_idx = 0; node_idx < node_cnt_; node_idx++) {
209  if (best_node == NULL ||
210  best_node->BestCost() > node_array_[node_idx]->BestCost()) {
211  best_node = node_array_[node_idx];
212  }
213  }
214 
215  return best_node;
216 }
217 } // namespace tesseract
bool UpdateParent(SearchNode *new_parent, int new_reco_cost, LangModEdge *new_edge)
Definition: search_node.cpp:77
bool Insert(LangModEdge *lang_mod_edge, SearchNode *srch_node)
Definition: search_node.h:119
virtual bool IsOOD() const =0
#define tprintf(...)
Definition: tprintf.h:31
SearchNode * Lookup(LangModEdge *lang_mod_edge, SearchNode *parent_node)
Definition: search_node.h:137
static int SearchNodeComparer(const void *node1, const void *node2)
Definition: search_node.h:75
SearchNode * AddNode(LangModEdge *edge, int score, SearchNode *parent, CubeRecoContext *cntxt)
SearchColumn(int col_idx, int max_node_cnt)