32 word_mode_ = word_mode;
36 void BeamSearch::Cleanup() {
38 for (
int col = 0; col < col_cnt_; col++) {
59 lm_parent_edge, &edge_cnt);
62 for (
int edge = 0; edge < edge_cnt; edge++) {
66 !lm_edges[edge]->
IsEOW()) {
68 delete lm_edges[edge];
74 if (char_alt_list && char_alt_list->
AltCount() > 0) {
76 lm_edges[edge]->ClassID()));
78 recognition_cost += extra_cost;
83 if (recognition_cost >= 0) {
84 out_col->
AddNode(lm_edges[edge], recognition_cost, parent_node,
87 delete lm_edges[edge];
102 fprintf(stderr,
"Cube ERROR (BeamSearch::Search): could not construct " 112 if (seg_pt_cnt_ < 0) {
115 col_cnt_ = seg_pt_cnt_ + 1;
118 if (seg_pt_cnt_ > 128) {
119 fprintf(stderr,
"Cube ERROR (BeamSearch::Search): segment point count is " 120 "suspiciously high; bailing out\n");
126 memset(col_, 0, col_cnt_ *
sizeof(*col_));
129 for (
int end_seg = 1; end_seg <= (seg_pt_cnt_ + 1); end_seg++) {
136 for (
int strt_seg = init_seg; strt_seg < end_seg; strt_seg++) {
137 int parent_nodes_cnt;
142 parent_nodes_cnt = 1;
146 parent_nodes_cnt = col_[strt_seg - 1]->
NodeCount();
147 parent_nodes = col_[strt_seg - 1]->
Nodes();
154 for (
int parent_idx = 0; parent_idx < parent_nodes_cnt; parent_idx++) {
156 SearchNode *parent_node = !parent_nodes ? NULL
157 : parent_nodes[parent_idx];
162 int contig_cost = srch_obj->
NoSpaceCost(strt_seg - 1, end_seg - 1);
166 int no_space_cost = 0;
167 if (!word_mode_ && strt_seg > 0) {
168 no_space_cost = srch_obj->
NoSpaceCost(strt_seg - 1);
174 CreateChildren(col_[end_seg - 1], lang_mod, parent_node,
175 lm_parent_edge, char_alt_list,
176 contig_cost + no_space_cost);
180 if (!word_mode_ && strt_seg > 0) {
184 int space_cost = srch_obj->
SpaceCost(strt_seg - 1);
189 CreateChildren(col_[end_seg - 1], lang_mod, parent_node, NULL,
190 char_alt_list, contig_cost + space_cost);
198 col_[end_seg - 1]->
Prune();
204 WordAltList *alt_list = CreateWordAltList(srch_obj);
211 int node_cnt = col_[col_cnt_ - 1]->
NodeCount();
218 best_presorted_node_idx_ = 0;
226 for (
int node_idx = 0; node_idx < node_cnt; node_idx++) {
228 int recognition_cost = srch_nodes[node_idx]->
BestCost();
231 int size_cost =
SizeCost(srch_obj, srch_nodes[node_idx], &ch_buff);
236 int bigram_cost = !bigrams ? 0 :
239 int unigram_cost = !word_unigrams ? 0 :
243 cost =
static_cast<int>(
250 alt_list->
Insert(ch_buff, cost,
251 static_cast<void *>(srch_nodes[node_idx]));
254 if (best_cost < 0 || cost < best_cost) {
255 best_presorted_node_idx_ = node_idx;
269 if (col < 0 || col >= col_cnt_ || !col_)
276 if (col_cnt_ < 1 || !col_ || !col_[col_cnt_ - 1])
279 int node_cnt = col_[col_cnt_ - 1]->
NodeCount();
281 if (node_cnt < 1 || !srch_nodes || !srch_nodes[0])
283 return srch_nodes[0];
318 int *char_cnt,
char_32 **str32,
319 Boxa **char_boxes)
const {
334 return BackTrack(srch_obj, srch_node, char_cnt, str32, char_boxes);
342 int *char_cnt,
char_32 **str32,
343 Boxa **char_boxes)
const {
354 if (char_boxes && *char_boxes) {
355 boxaDestroy(char_boxes);
359 chars = SplitByNode(srch_obj, srch_node, char_cnt, char_boxes);
371 Boxa **char_boxes)
const {
387 boxaDestroy(char_boxes);
388 *char_boxes = boxaCreate(*char_cnt);
389 if (*char_boxes == NULL)
394 CharSamp **chars =
new CharSamp *[*char_cnt];
396 int ch_idx = *char_cnt - 1;
397 int seg_pt_cnt = srch_obj->
SegPtCnt();
399 while (srch_node && ch_idx >= 0) {
401 SearchNode *parent_node = srch_node->
ParentNode();
404 int st_col = !parent_node ? 0 : parent_node->
ColIdx() + 1;
405 int st_seg_pt = st_col <= 0 ? -1 : st_col - 1;
406 int end_col = srch_node->
ColIdx();
407 int end_seg_pt = end_col >= seg_pt_cnt ? seg_pt_cnt : end_col;
410 CharSamp *samp = srch_obj->
CharSample(st_seg_pt, end_seg_pt);
416 chars[ch_idx] = samp;
419 Box *char_box = boxCreate(samp->Left(), samp->Top(),
420 samp->Width(), samp->Height());
425 boxaAddBox(*char_boxes, char_box, L_INSERT);
427 srch_node = parent_node;
433 boxaDestroy(char_boxes);
439 int char_boxa_size = boxaGetCount(*char_boxes);
440 int limit = char_boxa_size / 2;
441 for (
int i = 0; i < limit; ++i) {
443 int box2_idx = char_boxa_size - 1 - i;
444 Box *box1 = boxaGetBox(*char_boxes, box1_idx, L_CLONE);
445 Box *box2 = boxaGetBox(*char_boxes, box2_idx, L_CLONE);
446 boxaReplaceBox(*char_boxes, box2_idx, box1);
447 boxaReplaceBox(*char_boxes, box1_idx, box2);
462 chars =
BackTrack(srch_obj, node, &char_cnt, str32, NULL);
465 int size_cost = (cntxt_->
SizeModel() == NULL) ? 0 :
WordAltList * Search(SearchObject *srch_obj, LangModel *lang_mod=NULL)
virtual LangModEdge * Root()=0
virtual CharSamp * CharSample(int start_pt, int end_pt)=0
SearchNode * BestNode() const
SearchNode * ParentNode()
CharSamp ** BackTrack(SearchObject *srch_obj, int node_index, int *char_cnt, char_32 **str32, Boxa **char_boxes) const
SearchNode ** Nodes() const
SearchColumn * Column(int col_idx) const
virtual int SpaceCost(int seg_pt)=0
virtual LangModEdge ** GetEdges(CharAltList *alt_list, LangModEdge *parent_edge, int *edge_cnt)=0
WordUnigrams * WordUnigramsObj() const
virtual int NoSpaceCost(int seg_pt)=0
int MaxSegPerChar() const
LangModel * LangMod() const
void SetLabel(char_32 label)
LangModEdge * LangModelEdge()
TuningParams * Params() const
int SizeCost(SearchObject *srch_obj, SearchNode *node, char_32 **str32=NULL) const
int Cost(CharSamp **samp_array, int samp_cnt) const
int Cost(const char_32 *str, CharSet *char_set) const
CharSet * CharacterSet() const
virtual bool IsEOW() const =0
int ClassCost(int class_id) const
WordSizeModel * SizeModel() const
BeamSearch(CubeRecoContext *cntxt, bool word_mode=true)
bool Insert(char_32 *char_ptr, int cost, void *tag=NULL)
SearchNode * AddNode(LangModEdge *edge, int score, SearchNode *parent, CubeRecoContext *cntxt)
const char_32 * NodeString()
int Cost(const char_32 *str32, LangModel *lang_mod, CharSet *char_set) const
char_32 * Alt(int alt) const
CharBigrams * Bigrams() const
double WordUnigramWgt() const
double CharBigramWgt() const
virtual CharAltList * RecognizeSegment(int start_pt, int end_pt)=0