tesseract  3.05.02
dawg.h
Go to the documentation of this file.
1 /* -*-C-*-
2  ********************************************************************************
3  *
4  * File: dawg.h (Formerly dawg.h)
5  * Description: Definition of a class that represents Directed Accyclic Word
6  * Graph (DAWG), functions to build and manipulate the DAWG.
7  * Author: Mark Seaman, SW Productivity
8  * Created: Fri Oct 16 14:37:00 1987
9  * Modified: Wed Jun 19 16:50:24 1991 (Mark Seaman) marks@hpgrlt
10  * Language: C
11  * Package: N/A
12  * Status: Reusable Software Component
13  *
14  * (c) Copyright 1987, Hewlett-Packard Company.
15  ** Licensed under the Apache License, Version 2.0 (the "License");
16  ** you may not use this file except in compliance with the License.
17  ** You may obtain a copy of the License at
18  ** http://www.apache.org/licenses/LICENSE-2.0
19  ** Unless required by applicable law or agreed to in writing, software
20  ** distributed under the License is distributed on an "AS IS" BASIS,
21  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22  ** See the License for the specific language governing permissions and
23  ** limitations under the License.
24  *
25  *********************************************************************************/
26 
27 #ifndef DICT_DAWG_H_
28 #define DICT_DAWG_H_
29 
30 /*----------------------------------------------------------------------
31  I n c l u d e s
32 ----------------------------------------------------------------------*/
33 
34 #include "elst.h"
35 #include "ratngs.h"
36 #include "params.h"
37 #include "tesscallback.h"
38 
39 #ifndef __GNUC__
40 #ifdef _WIN32
41 #define NO_EDGE (inT64) 0xffffffffffffffffi64
42 #endif /*_WIN32*/
43 #else
44 #define NO_EDGE (inT64) 0xffffffffffffffffll
45 #endif /*__GNUC__*/
46 
47 /*----------------------------------------------------------------------
48  T y p e s
49 ----------------------------------------------------------------------*/
50 class UNICHARSET;
51 
52 typedef uinT64 EDGE_RECORD;
54 typedef inT64 EDGE_REF;
55 typedef inT64 NODE_REF;
56 typedef EDGE_REF *NODE_MAP;
57 
58 namespace tesseract {
59 
60 struct NodeChild {
64  NodeChild(): unichar_id(INVALID_UNICHAR_ID), edge_ref(NO_EDGE) {}
65 };
66 
70 
71 enum DawgType {
76 
77  DAWG_TYPE_COUNT // number of enum entries
78 };
79 
80 /*----------------------------------------------------------------------
81  C o n s t a n t s
82 ----------------------------------------------------------------------*/
83 
84 #define FORWARD_EDGE (inT32) 0
85 #define BACKWARD_EDGE (inT32) 1
86 #define MAX_NODE_EDGES_DISPLAY (inT64) 100
87 #define MARKER_FLAG (inT64) 1
88 #define DIRECTION_FLAG (inT64) 2
89 #define WERD_END_FLAG (inT64) 4
90 #define LETTER_START_BIT 0
91 #define NUM_FLAG_BITS 3
92 #define REFFORMAT "%lld"
93 
94 static const bool kDawgSuccessors[DAWG_TYPE_COUNT][DAWG_TYPE_COUNT] = {
95  { 0, 1, 1, 0 }, // for DAWG_TYPE_PUNCTUATION
96  { 1, 0, 0, 0 }, // for DAWG_TYPE_WORD
97  { 1, 0, 0, 0 }, // for DAWG_TYPE_NUMBER
98  { 0, 0, 0, 0 }, // for DAWG_TYPE_PATTERN
99 };
100 
101 static const char kWildcard[] = "*";
102 
103 
104 /*----------------------------------------------------------------------
105  C l a s s e s a n d S t r u c t s
106 ----------------------------------------------------------------------*/
107 //
117 //
118 class Dawg {
119  public:
121  static const inT16 kDawgMagicNumber = 42;
125  static const UNICHAR_ID kPatternUnicharID = 0;
126 
127  inline DawgType type() const { return type_; }
128  inline const STRING &lang() const { return lang_; }
129  inline PermuterType permuter() const { return perm_; }
130 
131  virtual ~Dawg() {}
132 
134  bool word_in_dawg(const WERD_CHOICE &word) const;
135 
136  // Returns true if the given word prefix is not contraindicated by the dawg.
137  // If requires_complete is true, then the exact complete word must be present.
138  bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const;
139 
142  int check_for_words(const char *filename,
143  const UNICHARSET &unicharset,
144  bool enable_wildcard) const;
145 
146  // For each word in the Dawg, call the given (permanent) callback with the
147  // text (UTF-8) version of the word.
148  void iterate_words(const UNICHARSET &unicharset,
150 
151  // For each word in the Dawg, call the given (permanent) callback with the
152  // text (UTF-8) version of the word.
153  void iterate_words(const UNICHARSET &unicharset,
154  TessCallback1<const char *> *cb) const;
155 
156  // Pure virtual function that should be implemented by the derived classes.
157 
159  virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
160  bool word_end) const = 0;
161 
164  virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec,
165  bool word_end) const = 0;
166 
169  virtual NODE_REF next_node(EDGE_REF edge_ref) const = 0;
170 
173  virtual bool end_of_word(EDGE_REF edge_ref) const = 0;
174 
176  virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const = 0;
177 
180  virtual void print_node(NODE_REF node, int max_num_edges) const = 0;
181 
184  virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id,
185  const UNICHARSET &unicharset,
186  GenericVector<UNICHAR_ID> *vec) const {
187  (void)unichar_id;
188  (void)unicharset;
189  (void)vec;
190  }
191 
196  EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const {
197  (void)edge_ref;
198  (void)unichar_id;
199  (void)word_end;
200  return false;
201  }
202 
203  protected:
204  Dawg() {}
205 
207  inline NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const {
208  return ((edge_rec & next_node_mask_) >> next_node_start_bit_);
209  }
211  inline bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const {
212  return (edge_rec & (MARKER_FLAG << flag_start_bit_)) != 0;
213  }
215  inline int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const {
216  return ((edge_rec & (DIRECTION_FLAG << flag_start_bit_))) ?
218  }
220  inline bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const {
221  return (edge_rec & (WERD_END_FLAG << flag_start_bit_)) != 0;
222  }
225  const EDGE_RECORD &edge_rec) const {
226  return ((edge_rec & letter_mask_) >> LETTER_START_BIT);
227  }
230  EDGE_RECORD *edge_rec, EDGE_REF value) {
231  *edge_rec &= (~next_node_mask_);
232  *edge_rec |= ((value << next_node_start_bit_) & next_node_mask_);
233  }
235  inline void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec) {
236  *edge_rec |= (MARKER_FLAG << flag_start_bit_);
237  }
246  bool word_end,
247  UNICHAR_ID unichar_id,
248  const EDGE_RECORD &edge_rec) const {
249  UNICHAR_ID curr_unichar_id = unichar_id_from_edge_rec(edge_rec);
250  NODE_REF curr_next_node = next_node_from_edge_rec(edge_rec);
251  bool curr_word_end = end_of_word_from_edge_rec(edge_rec);
252  if (edge_rec_match(next_node, word_end, unichar_id, curr_next_node,
253  curr_word_end, curr_unichar_id)) return 0;
254  if (unichar_id > curr_unichar_id) return 1;
255  if (unichar_id == curr_unichar_id) {
256  if (next_node > curr_next_node) return 1;
257  if (next_node == curr_next_node) {
258  if (word_end > curr_word_end) return 1;
259  }
260  }
261  return -1;
262  }
267  bool word_end,
268  UNICHAR_ID unichar_id,
269  NODE_REF other_next_node,
270  bool other_word_end,
271  UNICHAR_ID other_unichar_id) const {
272  return ((unichar_id == other_unichar_id) &&
273  (next_node == NO_EDGE || next_node == other_next_node) &&
274  (!word_end || (word_end == other_word_end)));
275  }
276 
279  void init(DawgType type, const STRING &lang,
280  PermuterType perm, int unicharset_size, int debug_level);
281 
287  bool match_words(WERD_CHOICE *word, inT32 index,
288  NODE_REF node, UNICHAR_ID wildcard) const;
289 
290  // Recursively iterate over all words in a dawg (see public iterate_words).
291  void iterate_words_rec(const WERD_CHOICE &word_so_far,
292  NODE_REF to_explore,
294 
295  // Member Variables.
300  // Variables to construct various edge masks. Formerly:
301  // #define NEXT_EDGE_MASK (inT64) 0xfffffff800000000i64
302  // #define FLAGS_MASK (inT64) 0x0000000700000000i64
303  // #define LETTER_MASK (inT64) 0x00000000ffffffffi64
310  // Level of debug statements to print to stdout.
312 };
313 
314 //
315 // DawgPosition keeps track of where we are in the primary dawg we're searching
316 // as well as where we may be in the "punctuation dawg" which may provide
317 // surrounding context.
318 //
319 // Example:
320 // punctuation dawg -- space is the "pattern character"
321 // " " // no punctuation
322 // "' '" // leading and trailing apostrophes
323 // " '" // trailing apostrophe
324 // word dawg:
325 // "cat"
326 // "cab"
327 // "cat's"
328 //
329 // DawgPosition(dawg_index, dawg_ref, punc_index, punc_ref, rtp)
330 //
331 // DawgPosition(-1, NO_EDGE, p, pe, false)
332 // We're in the punctuation dawg, no other dawg has been started.
333 // (1) If there's a pattern edge as a punc dawg child of us,
334 // for each punc-following dawg starting with ch, produce:
335 // Result: DawgPosition(k, w, p', false)
336 // (2) If there's a valid continuation in the punc dawg, produce:
337 // Result: DawgPosition(-k, NO_EDGE, p', false)
338 //
339 // DawgPosition(k, w, -1, NO_EDGE, false)
340 // We're in dawg k. Going back to punctuation dawg is not an option.
341 // Follow ch in dawg k.
342 //
343 // DawgPosition(k, w, p, pe, false)
344 // We're in dawg k. Continue in dawg k and/or go back to the punc dawg.
345 // If ending, check that the punctuation dawg is also ok to end here.
346 //
347 // DawgPosition(k, w, p, pe true)
348 // We're back in the punctuation dawg. Continuing there is the only option.
349 struct DawgPosition {
351  : dawg_index(-1), dawg_ref(NO_EDGE), punc_ref(NO_EDGE),
352  back_to_punc(false) {}
353  DawgPosition(int dawg_idx, EDGE_REF dawgref,
354  int punc_idx, EDGE_REF puncref,
355  bool backtopunc)
356  : dawg_index(dawg_idx), dawg_ref(dawgref),
357  punc_index(punc_idx), punc_ref(puncref),
358  back_to_punc(backtopunc) {
359  }
360  bool operator==(const DawgPosition &other) {
361  return dawg_index == other.dawg_index &&
362  dawg_ref == other.dawg_ref &&
363  punc_index == other.punc_index &&
364  punc_ref == other.punc_ref &&
365  back_to_punc == other.back_to_punc;
366  }
367 
372  // Have we returned to the punc dawg at the end of the word?
374 };
375 
376 class DawgPositionVector : public GenericVector<DawgPosition> {
377  public:
380  void clear() { size_used_ = 0; }
384  inline bool add_unique(const DawgPosition &new_pos,
385  bool debug,
386  const char *debug_msg) {
387  for (int i = 0; i < size_used_; ++i) {
388  if (data_[i] == new_pos) return false;
389  }
390  push_back(new_pos);
391  if (debug) {
392  tprintf("%s[%d, " REFFORMAT "] [punc: " REFFORMAT "%s]\n",
393  debug_msg, new_pos.dawg_index, new_pos.dawg_ref,
394  new_pos.punc_ref, new_pos.back_to_punc ? " returned" : "");
395  }
396  return true;
397  }
398 };
399 
400 //
407 //
408 class SquishedDawg : public Dawg {
409  public:
410  SquishedDawg(FILE *file, DawgType type, const STRING &lang,
411  PermuterType perm, int debug_level) {
412  read_squished_dawg(file, type, lang, perm, debug_level);
413  num_forward_edges_in_node0 = num_forward_edges(0);
414  }
416  const STRING &lang, PermuterType perm, int debug_level) {
417  FILE *file = fopen(filename, "rb");
418  if (file == NULL) {
419  tprintf("Failed to open dawg file %s\n", filename);
420  exit(1);
421  }
422  read_squished_dawg(file, type, lang, perm, debug_level);
423  num_forward_edges_in_node0 = num_forward_edges(0);
424  fclose(file);
425  }
426  SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type,
427  const STRING &lang, PermuterType perm,
428  int unicharset_size, int debug_level) :
429  edges_(edges), num_edges_(num_edges) {
430  init(type, lang, perm, unicharset_size, debug_level);
431  num_forward_edges_in_node0 = num_forward_edges(0);
432  if (debug_level > 3) print_all("SquishedDawg:");
433  }
434  ~SquishedDawg();
435 
436  int NumEdges() { return num_edges_; }
437 
439  EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id,
440  bool word_end) const;
441 
445  bool word_end) const {
446  EDGE_REF edge = node;
447  if (!edge_occupied(edge) || edge == NO_EDGE) return;
448  assert(forward_edge(edge)); // we don't expect any backward edges to
449  do { // be present when this function is called
450  if (!word_end || end_of_word_from_edge_rec(edges_[edge])) {
451  vec->push_back(NodeChild(unichar_id_from_edge_rec(edges_[edge]), edge));
452  }
453  } while (!last_edge(edge++));
454  }
455 
459  return next_node_from_edge_rec((edges_[edge]));
460  }
461 
464  bool end_of_word(EDGE_REF edge_ref) const {
465  return end_of_word_from_edge_rec((edges_[edge_ref]));
466  }
467 
469  UNICHAR_ID edge_letter(EDGE_REF edge_ref) const {
470  return unichar_id_from_edge_rec((edges_[edge_ref]));
471  }
472 
475  void print_node(NODE_REF node, int max_num_edges) const;
476 
478  void write_squished_dawg(FILE *file);
479 
482  void write_squished_dawg(const char *filename) {
483  FILE *file = fopen(filename, "wb");
484  if (file == NULL) {
485  tprintf("Error opening %s\n", filename);
486  exit(1);
487  }
488  this->write_squished_dawg(file);
489  fclose(file);
490  }
491 
492  private:
494  inline void set_next_node(EDGE_REF edge_ref, EDGE_REF value) {
495  set_next_node_in_edge_rec(&(edges_[edge_ref]), value);
496  }
498  inline void set_empty_edge(EDGE_REF edge_ref) {
499  (edges_[edge_ref] = next_node_mask_);
500  }
502  inline void clear_all_edges() {
503  for (int edge = 0; edge < num_edges_; edge++) set_empty_edge(edge);
504  }
506  inline void clear_marker_flag(EDGE_REF edge_ref) {
507  (edges_[edge_ref] &= ~(MARKER_FLAG << flag_start_bit_));
508  }
510  inline bool forward_edge(EDGE_REF edge_ref) const {
511  return (edge_occupied(edge_ref) &&
512  (FORWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
513  }
515  inline bool backward_edge(EDGE_REF edge_ref) const {
516  return (edge_occupied(edge_ref) &&
517  (BACKWARD_EDGE == direction_from_edge_rec(edges_[edge_ref])));
518  }
520  inline bool edge_occupied(EDGE_REF edge_ref) const {
521  return (edges_[edge_ref] != next_node_mask_);
522  }
524  inline bool last_edge(EDGE_REF edge_ref) const {
525  return (edges_[edge_ref] & (MARKER_FLAG << flag_start_bit_)) != 0;
526  }
527 
529  inT32 num_forward_edges(NODE_REF node) const;
530 
532  void read_squished_dawg(FILE *file, DawgType type, const STRING &lang,
533  PermuterType perm, int debug_level);
534 
536  void print_edge(EDGE_REF edge) const;
537 
539  void print_all(const char* msg) {
540  tprintf("\n__________________________\n%s\n", msg);
541  for (int i = 0; i < num_edges_; ++i) print_edge(i);
542  tprintf("__________________________\n");
543  }
545  NODE_MAP build_node_map(inT32 *num_nodes) const;
546 
547 
548  // Member variables.
549  EDGE_ARRAY edges_;
550  int num_edges_;
551  int num_forward_edges_in_node0;
552 };
553 
554 } // namespace tesseract
555 
556 #endif // DICT_DAWG_H_
void set_next_node_in_edge_rec(EDGE_RECORD *edge_rec, EDGE_REF value)
Sets the next node link for this edge in the Dawg.
Definition: dawg.h:229
virtual EDGE_REF pattern_loop_edge(EDGE_REF edge_ref, UNICHAR_ID unichar_id, bool word_end) const
Definition: dawg.h:195
bool prefix_in_dawg(const WERD_CHOICE &prefix, bool requires_complete) const
Definition: dawg.cpp:48
static const UNICHAR_ID kPatternUnicharID
Definition: dawg.h:125
bool marker_flag_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the marker flag of this edge.
Definition: dawg.h:211
uinT64 letter_mask_
Definition: dawg.h:309
virtual void print_node(NODE_REF node, int max_num_edges) const =0
bool operator==(const DawgPosition &other)
Definition: dawg.h:360
void write_squished_dawg(const char *filename)
Definition: dawg.h:482
EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const
Returns the edge that corresponds to the letter out of this node.
Definition: dawg.cpp:201
uinT64 flags_mask_
Definition: dawg.h:308
inT64 EDGE_REF
Definition: dawg.h:54
short inT16
Definition: host.h:33
NODE_REF next_node(EDGE_REF edge) const
Definition: dawg.h:458
inT64 NODE_REF
Definition: dawg.h:55
#define LETTER_START_BIT
Definition: dawg.h:90
DawgPosition(int dawg_idx, EDGE_REF dawgref, int punc_idx, EDGE_REF puncref, bool backtopunc)
Definition: dawg.h:353
virtual bool end_of_word(EDGE_REF edge_ref) const =0
uinT64 next_node_mask_
Definition: dawg.h:307
#define MARKER_FLAG
Definition: dawg.h:87
PermuterType
Definition: ratngs.h:240
EDGE_REF edge_ref
Definition: dawg.h:62
int next_node_start_bit_
Definition: dawg.h:306
DawgType
Definition: dawg.h:71
void write_squished_dawg(FILE *file)
Writes the squished/reduced Dawg to a file.
Definition: dawg.cpp:387
void print_node(NODE_REF node, int max_num_edges) const
Definition: dawg.cpp:246
static const inT16 kDawgMagicNumber
Magic number to determine endianness when reading the Dawg from file.
Definition: dawg.h:121
virtual ~Dawg()
Definition: dawg.h:131
EDGE_REF * NODE_MAP
Definition: dawg.h:56
bool match_words(WERD_CHOICE *word, inT32 index, NODE_REF node, UNICHAR_ID wildcard) const
Definition: dawg.cpp:144
EDGE_RECORD * EDGE_ARRAY
Definition: dawg.h:53
int given_greater_than_edge_rec(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, const EDGE_RECORD &edge_rec) const
Definition: dawg.h:245
PermuterType perm_
Permuter code that should be used if the word is found in this Dawg.
Definition: dawg.h:299
STRING lang_
Definition: dawg.h:297
DawgType type() const
Definition: dawg.h:127
int push_back(DawgPosition object)
SquishedDawg(const char *filename, DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:415
int direction_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the direction flag of this edge.
Definition: dawg.h:215
int unicharset_size_
Definition: dawg.h:304
UNICHAR_ID edge_letter(EDGE_REF edge_ref) const
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
Definition: dawg.h:469
int check_for_words(const char *filename, const UNICHARSET &unicharset, bool enable_wildcard) const
Definition: dawg.cpp:73
bool end_of_word_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns true if this edge marks the end of a word.
Definition: dawg.h:220
UNICHAR_ID unichar_id_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns UNICHAR_ID recorded in this edge.
Definition: dawg.h:224
virtual UNICHAR_ID edge_letter(EDGE_REF edge_ref) const =0
Returns UNICHAR_ID stored in the edge indicated by the given EDGE_REF.
void iterate_words(const UNICHARSET &unicharset, TessCallback1< const WERD_CHOICE *> *cb) const
Definition: dawg.cpp:105
DawgType type_
Definition: dawg.h:296
virtual void unichar_id_to_patterns(UNICHAR_ID unichar_id, const UNICHARSET &unicharset, GenericVector< UNICHAR_ID > *vec) const
Definition: dawg.h:184
unsigned long long int uinT64
Definition: host.h:42
SIGNED char inT8
Definition: host.h:31
EDGE_REF dawg_ref
Definition: dawg.h:369
long long int inT64
Definition: host.h:41
virtual EDGE_REF edge_char_of(NODE_REF node, UNICHAR_ID unichar_id, bool word_end) const =0
Returns the edge that corresponds to the letter out of this node.
PermuterType permuter() const
Definition: dawg.h:129
virtual NODE_REF next_node(EDGE_REF edge_ref) const =0
int debug_level_
Definition: dawg.h:311
void set_marker_flag_in_edge_rec(EDGE_RECORD *edge_rec)
Sets this edge record to be the last one in a sequence of edges.
Definition: dawg.h:235
int inT32
Definition: host.h:35
#define tprintf(...)
Definition: tprintf.h:31
Definition: strngs.h:44
#define REFFORMAT
Definition: dawg.h:92
bool edge_rec_match(NODE_REF next_node, bool word_end, UNICHAR_ID unichar_id, NODE_REF other_next_node, bool other_word_end, UNICHAR_ID other_unichar_id) const
Definition: dawg.h:266
#define BACKWARD_EDGE
Definition: dawg.h:85
#define DIRECTION_FLAG
Definition: dawg.h:88
#define WERD_END_FLAG
Definition: dawg.h:89
EDGE_REF punc_ref
Definition: dawg.h:371
bool end_of_word(EDGE_REF edge_ref) const
Definition: dawg.h:464
SquishedDawg(EDGE_ARRAY edges, int num_edges, DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
Definition: dawg.h:426
UNICHAR_ID unichar_id
Definition: dawg.h:61
GenericVector< SuccessorList * > SuccessorListsVector
Definition: dawg.h:69
bool add_unique(const DawgPosition &new_pos, bool debug, const char *debug_msg)
Definition: dawg.h:384
int flag_start_bit_
Definition: dawg.h:305
GenericVector< NodeChild > NodeChildVector
Definition: dawg.h:67
NODE_REF next_node_from_edge_rec(const EDGE_RECORD &edge_rec) const
Returns the next node visited by following this edge.
Definition: dawg.h:207
bool word_in_dawg(const WERD_CHOICE &word) const
Returns true if the given word is in the Dawg.
Definition: dawg.cpp:69
void iterate_words_rec(const WERD_CHOICE &word_so_far, NODE_REF to_explore, TessCallback1< const WERD_CHOICE *> *cb) const
Definition: dawg.cpp:126
void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const
Definition: dawg.h:444
virtual void unichar_ids_of(NODE_REF node, NodeChildVector *vec, bool word_end) const =0
void init(DawgType type, const STRING &lang, PermuterType perm, int unicharset_size, int debug_level)
Definition: dawg.cpp:176
const STRING & lang() const
Definition: dawg.h:128
#define FORWARD_EDGE
Definition: dawg.h:84
SquishedDawg(FILE *file, DawgType type, const STRING &lang, PermuterType perm, int debug_level)
Definition: dawg.h:410
uinT64 EDGE_RECORD
Definition: dawg.h:50
NodeChild(UNICHAR_ID id, EDGE_REF ref)
Definition: dawg.h:63
int UNICHAR_ID
Definition: unichar.h:33
GenericVector< int > SuccessorList
Definition: dawg.h:68