tesseract  3.05.02
tess_lang_mod_edge.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: tess_lang_mod_edge.cpp
3  * Description: Implementation of the Tesseract Language Model Edge Class
4  * Author: Ahmad Abdulkader
5  * Created: 2008
6  *
7  * (C) Copyright 2008, Google Inc.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  *
18  **********************************************************************/
19 
20 #include "tess_lang_mod_edge.h"
21 #include "const.h"
22 #include "unichar.h"
23 
24 
25 
26 namespace tesseract {
27 // OOD constructor
29  root_ = false;
30  cntxt_ = cntxt;
31  dawg_ = NULL;
32  start_edge_ = 0;
33  end_edge_ = 0;
34  edge_mask_ = 0;
35  class_id_ = class_id;
36  str_ = cntxt_->CharacterSet()->ClassString(class_id);
37  path_cost_ = Cost();
38 }
39 
44  const Dawg *dawg, EDGE_REF edge_idx, int class_id) {
45  root_ = false;
46  cntxt_ = cntxt;
47  dawg_ = dawg;
48  start_edge_ = edge_idx;
49  end_edge_ = edge_idx;
50  edge_mask_ = 0;
51  class_id_ = class_id;
52  str_ = cntxt_->CharacterSet()->ClassString(class_id);
53  path_cost_ = Cost();
54 }
55 
60  EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
61  int class_id) {
62  root_ = false;
63  cntxt_ = cntxt;
64  dawg_ = dawg;
65  start_edge_ = start_edge_idx;
66  end_edge_ = end_edge_idx;
67  edge_mask_ = 0;
68  class_id_ = class_id;
69  str_ = cntxt_->CharacterSet()->ClassString(class_id);
70  path_cost_ = Cost();
71 }
72 
74  char *char_ptr = new char[256];
75 
76  char dawg_str[256];
77  char edge_str[32];
78  if (dawg_ == (Dawg *)DAWG_OOD) {
79  strcpy(dawg_str, "OOD");
80  } else if (dawg_ == (Dawg *)DAWG_NUMBER) {
81  strcpy(dawg_str, "NUM");
82  } else if (dawg_->permuter() == SYSTEM_DAWG_PERM) {
83  strcpy(dawg_str, "Main");
84  } else if (dawg_->permuter() == USER_DAWG_PERM) {
85  strcpy(dawg_str, "User");
86  } else if (dawg_->permuter() == DOC_DAWG_PERM) {
87  strcpy(dawg_str, "Doc");
88  } else {
89  strcpy(dawg_str, "N/A");
90  }
91 
92  sprintf(edge_str, "%d", static_cast<int>(start_edge_));
93  if (IsLeadingPuncEdge(edge_mask_)) {
94  strcat(edge_str, "-LP");
95  }
96  if (IsTrailingPuncEdge(edge_mask_)) {
97  strcat(edge_str, "-TP");
98  }
99  sprintf(char_ptr, "%s(%s)%s, Wtd Dawg Cost=%d",
100  dawg_str, edge_str, IsEOW() ? "-EOW-" : "", path_cost_);
101 
102  return char_ptr;
103 }
104 
106  const Dawg *dawg,
107  NODE_REF parent_node,
108  LangModEdge **edge_array) {
109  int edge_cnt = 0;
110  NodeChildVector vec;
111  dawg->unichar_ids_of(parent_node, &vec, false); // find all children
112  for (int i = 0; i < vec.size(); ++i) {
113  const NodeChild &child = vec[i];
114  if (child.unichar_id == INVALID_UNICHAR_ID) continue;
115  edge_array[edge_cnt++] =
116  new TessLangModEdge(cntxt, dawg, child.edge_ref, child.unichar_id);
117  }
118  return edge_cnt;
119 }
120 }
#define IsLeadingPuncEdge(edge_mask)
inT64 EDGE_REF
Definition: dawg.h:54
inT64 NODE_REF
Definition: dawg.h:55
#define IsTrailingPuncEdge(edge_mask)
EDGE_REF edge_ref
Definition: dawg.h:62
static int CreateChildren(CubeRecoContext *cntxt, const Dawg *edges, NODE_REF edge_reg, LangModEdge **lm_edges)
TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array, EDGE_REF edge, int class_id)
PermuterType permuter() const
Definition: dawg.h:129
CharSet * CharacterSet() const
int size() const
Definition: genericvector.h:72
#define DAWG_OOD
UNICHAR_ID unichar_id
Definition: dawg.h:61
#define DAWG_NUMBER
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const =0
const char_32 * ClassString(int class_id) const
Definition: char_set.h:104