tesseract  3.05.02
adaptive.cpp
Go to the documentation of this file.
1 /******************************************************************************
2  ** Filename: adaptive.c
3  ** Purpose: Adaptive matcher.
4  ** Author: Dan Johnson
5  ** History: Fri Mar 8 10:00:21 1991, DSJ, Created.
6  **
7  ** (c) Copyright Hewlett-Packard Company, 1988.
8  ** Licensed under the Apache License, Version 2.0 (the "License");
9  ** you may not use this file except in compliance with the License.
10  ** You may obtain a copy of the License at
11  ** http://www.apache.org/licenses/LICENSE-2.0
12  ** Unless required by applicable law or agreed to in writing, software
13  ** distributed under the License is distributed on an "AS IS" BASIS,
14  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  ** See the License for the specific language governing permissions and
16  ** limitations under the License.
17  ******************************************************************************/
18 
19 /*----------------------------------------------------------------------------
20  Include Files and Type Defines
21 ----------------------------------------------------------------------------*/
22 #include "adaptive.h"
23 #include "emalloc.h"
24 #include "freelist.h"
25 #include "globals.h"
26 #include "classify.h"
27 
28 #ifdef __UNIX__
29 #include <assert.h>
30 #endif
31 #include <stdio.h>
32 
33 /*----------------------------------------------------------------------------
34  Public Code
35 ----------------------------------------------------------------------------*/
36 /*---------------------------------------------------------------------------*/
50  ADAPT_CLASS Class,
51  CLASS_ID ClassId) {
52  INT_CLASS IntClass;
53 
54  assert (Templates != NULL);
55  assert (Class != NULL);
56  assert (LegalClassId (ClassId));
57  assert (UnusedClassIdIn (Templates->Templates, ClassId));
58  assert (Class->NumPermConfigs == 0);
59 
60  IntClass = NewIntClass (1, 1);
61  AddIntClass (Templates->Templates, ClassId, IntClass);
62 
63  assert (Templates->Class[ClassId] == NULL);
64  Templates->Class[ClassId] = Class;
65 
66 } /* AddAdaptedClass */
67 
68 
69 /*---------------------------------------------------------------------------*/
81  assert (Config != NULL);
82 
83  destroy_nodes (Config->ContextsSeen, memfree);
84  FreeBitVector (Config->Protos);
85  free_struct (Config, sizeof (TEMP_CONFIG_STRUCT), "TEMP_CONFIG_STRUCT");
86 
87 } /* FreeTempConfig */
88 
89 /*---------------------------------------------------------------------------*/
90 void FreeTempProto(void *arg) {
91  PROTO proto = (PROTO) arg;
92 
93  free_struct (proto, sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT");
94 }
95 
97  assert(Config != NULL);
98  delete [] Config->Ambigs;
99  free_struct(Config, sizeof(PERM_CONFIG_STRUCT), "PERM_CONFIG_STRUCT");
100 }
101 
102 /*---------------------------------------------------------------------------*/
114  ADAPT_CLASS Class;
115  int i;
116 
117  Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
118  Class->NumPermConfigs = 0;
119  Class->MaxNumTimesSeen = 0;
120  Class->TempProtos = NIL_LIST;
121 
126 
127  for (i = 0; i < MAX_NUM_CONFIGS; i++)
128  TempConfigFor (Class, i) = NULL;
129 
130  return (Class);
131 
132 } /* NewAdaptedClass */
133 
134 
135 /*-------------------------------------------------------------------------*/
136 void free_adapted_class(ADAPT_CLASS adapt_class) {
137  int i;
138 
139  for (i = 0; i < MAX_NUM_CONFIGS; i++) {
140  if (ConfigIsPermanent (adapt_class, i)
141  && PermConfigFor (adapt_class, i) != NULL)
142  FreePermConfig (PermConfigFor (adapt_class, i));
143  else if (!ConfigIsPermanent (adapt_class, i)
144  && TempConfigFor (adapt_class, i) != NULL)
145  FreeTempConfig (TempConfigFor (adapt_class, i));
146  }
147  FreeBitVector (adapt_class->PermProtos);
148  FreeBitVector (adapt_class->PermConfigs);
149  destroy_nodes (adapt_class->TempProtos, FreeTempProto);
150  Efree(adapt_class);
151 }
152 
153 
154 /*---------------------------------------------------------------------------*/
155 namespace tesseract {
168  ADAPT_TEMPLATES Templates;
169  int i;
170 
171  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
172 
173  Templates->Templates = NewIntTemplates ();
174  Templates->NumPermClasses = 0;
175  Templates->NumNonEmptyClasses = 0;
176 
177  /* Insert an empty class for each unichar id in unicharset */
178  for (i = 0; i < MAX_NUM_CLASSES; i++) {
179  Templates->Class[i] = NULL;
180  if (InitFromUnicharset && i < unicharset.size()) {
181  AddAdaptedClass(Templates, NewAdaptedClass(), i);
182  }
183  }
184 
185  return (Templates);
186 
187 } /* NewAdaptedTemplates */
188 
189 // Returns FontinfoId of the given config of the given adapted class.
191  return (ConfigIsPermanent(Class, ConfigId) ?
192  PermConfigFor(Class, ConfigId)->FontinfoId :
193  TempConfigFor(Class, ConfigId)->FontinfoId);
194 }
195 
196 } // namespace tesseract
197 
198 /*----------------------------------------------------------------------------*/
200 
201  if (templates != NULL) {
202  int i;
203  for (i = 0; i < (templates->Templates)->NumClasses; i++)
204  free_adapted_class (templates->Class[i]);
205  free_int_templates (templates->Templates);
206  Efree(templates);
207  }
208 }
209 
210 
211 /*---------------------------------------------------------------------------*/
223 TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId) {
225  int NumProtos = MaxProtoId + 1;
226 
227  Config =
229  "TEMP_CONFIG_STRUCT");
230  Config->Protos = NewBitVector (NumProtos);
231 
232  Config->NumTimesSeen = 1;
233  Config->MaxProtoId = MaxProtoId;
234  Config->ProtoVectorSize = WordsInVectorOfSize (NumProtos);
235  Config->ContextsSeen = NIL_LIST;
236  zero_all_bits (Config->Protos, Config->ProtoVectorSize);
237  Config->FontinfoId = FontinfoId;
238 
239  return (Config);
240 
241 } /* NewTempConfig */
242 
243 
244 /*---------------------------------------------------------------------------*/
255  return ((TEMP_PROTO)
256  alloc_struct (sizeof (TEMP_PROTO_STRUCT), "TEMP_PROTO_STRUCT"));
257 } /* NewTempProto */
258 
259 
260 /*---------------------------------------------------------------------------*/
261 namespace tesseract {
274  int i;
275  INT_CLASS IClass;
276  ADAPT_CLASS AClass;
277 
278  fprintf (File, "\n\nSUMMARY OF ADAPTED TEMPLATES:\n\n");
279  fprintf (File, "Num classes = %d; Num permanent classes = %d\n\n",
280  Templates->NumNonEmptyClasses, Templates->NumPermClasses);
281  fprintf (File, " Id NC NPC NP NPP\n");
282  fprintf (File, "------------------------\n");
283 
284  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
285  IClass = Templates->Templates->Class[i];
286  AClass = Templates->Class[i];
287  if (!IsEmptyAdaptedClass (AClass)) {
288  fprintf (File, "%5d %s %3d %3d %3d %3d\n",
290  IClass->NumConfigs, AClass->NumPermConfigs,
291  IClass->NumProtos,
292  IClass->NumProtos - count (AClass->TempProtos));
293  }
294  }
295  fprintf (File, "\n");
296 
297 } /* PrintAdaptedTemplates */
298 } // namespace tesseract
299 
300 
301 /*---------------------------------------------------------------------------*/
314  int32_t NumTempProtos;
315  int32_t NumConfigs;
316  int i;
317  ADAPT_CLASS Class;
318  TEMP_PROTO TempProto;
319 
320  /* first read high level adapted class structure */
321  Class = (ADAPT_CLASS) Emalloc (sizeof (ADAPT_CLASS_STRUCT));
322  fread ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
323 
324  /* then read in the definitions of the permanent protos and configs */
325  Class->PermProtos = NewBitVector (MAX_NUM_PROTOS);
326  Class->PermConfigs = NewBitVector (MAX_NUM_CONFIGS);
327  fread ((char *) Class->PermProtos, sizeof (uinT32),
329  fread ((char *) Class->PermConfigs, sizeof (uinT32),
331 
332  /* then read in the list of temporary protos */
333  fread (&NumTempProtos, sizeof(NumTempProtos), 1, File);
334  Class->TempProtos = NIL_LIST;
335  for (i = 0; i < NumTempProtos; i++) {
336  TempProto =
338  "TEMP_PROTO_STRUCT");
339  fread ((char *) TempProto, sizeof (TEMP_PROTO_STRUCT), 1, File);
340  Class->TempProtos = push_last (Class->TempProtos, TempProto);
341  }
342 
343  /* then read in the adapted configs */
344  fread (&NumConfigs, sizeof(NumConfigs), 1, File);
345  for (i = 0; i < NumConfigs; i++)
346  if (test_bit (Class->PermConfigs, i))
347  Class->Config[i].Perm = ReadPermConfig (File);
348  else
349  Class->Config[i].Temp = ReadTempConfig (File);
350 
351  return (Class);
352 
353 } /* ReadAdaptedClass */
354 
355 
356 /*---------------------------------------------------------------------------*/
357 namespace tesseract {
370  int i;
371  ADAPT_TEMPLATES Templates;
372 
373  /* first read the high level adaptive template struct */
374  Templates = (ADAPT_TEMPLATES) Emalloc (sizeof (ADAPT_TEMPLATES_STRUCT));
375  fread ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
376 
377  /* then read in the basic integer templates */
378  Templates->Templates = ReadIntTemplates (File);
379 
380  /* then read in the adaptive info for each class */
381  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
382  Templates->Class[i] = ReadAdaptedClass (File);
383  }
384  return (Templates);
385 
386 } /* ReadAdaptedTemplates */
387 } // namespace tesseract
388 
389 
390 /*---------------------------------------------------------------------------*/
404  "PERM_CONFIG_STRUCT");
405  uinT8 NumAmbigs;
406  fread ((char *) &NumAmbigs, sizeof(uinT8), 1, File);
407  Config->Ambigs = new UNICHAR_ID[NumAmbigs + 1];
408  fread(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
409  Config->Ambigs[NumAmbigs] = -1;
410  fread(&(Config->FontinfoId), sizeof(int), 1, File);
411 
412  return (Config);
413 
414 } /* ReadPermConfig */
415 
416 
417 /*---------------------------------------------------------------------------*/
431 
432  Config =
434  "TEMP_CONFIG_STRUCT");
435  fread ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
436 
437  Config->Protos = NewBitVector (Config->ProtoVectorSize * BITSINLONG);
438  fread ((char *) Config->Protos, sizeof (uinT32),
439  Config->ProtoVectorSize, File);
440 
441  return (Config);
442 
443 } /* ReadTempConfig */
444 
445 
446 /*---------------------------------------------------------------------------*/
459 void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs) {
460  int NumTempProtos;
461  LIST TempProtos;
462  int i;
463 
464  /* first write high level adapted class structure */
465  fwrite ((char *) Class, sizeof (ADAPT_CLASS_STRUCT), 1, File);
466 
467  /* then write out the definitions of the permanent protos and configs */
468  fwrite ((char *) Class->PermProtos, sizeof (uinT32),
470  fwrite ((char *) Class->PermConfigs, sizeof (uinT32),
472 
473  /* then write out the list of temporary protos */
474  NumTempProtos = count (Class->TempProtos);
475  fwrite ((char *) &NumTempProtos, sizeof (int), 1, File);
476  TempProtos = Class->TempProtos;
477  iterate (TempProtos) {
478  void* proto = first_node(TempProtos);
479  fwrite ((char *) proto, sizeof (TEMP_PROTO_STRUCT), 1, File);
480  }
481 
482  /* then write out the adapted configs */
483  fwrite ((char *) &NumConfigs, sizeof (int), 1, File);
484  for (i = 0; i < NumConfigs; i++)
485  if (test_bit (Class->PermConfigs, i))
486  WritePermConfig (File, Class->Config[i].Perm);
487  else
488  WriteTempConfig (File, Class->Config[i].Temp);
489 
490 } /* WriteAdaptedClass */
491 
492 
493 /*---------------------------------------------------------------------------*/
494 namespace tesseract {
506  int i;
507 
508  /* first write the high level adaptive template struct */
509  fwrite ((char *) Templates, sizeof (ADAPT_TEMPLATES_STRUCT), 1, File);
510 
511  /* then write out the basic integer templates */
512  WriteIntTemplates (File, Templates->Templates, unicharset);
513 
514  /* then write out the adaptive info for each class */
515  for (i = 0; i < (Templates->Templates)->NumClasses; i++) {
516  WriteAdaptedClass (File, Templates->Class[i],
517  Templates->Templates->Class[i]->NumConfigs);
518  }
519 } /* WriteAdaptedTemplates */
520 } // namespace tesseract
521 
522 
523 /*---------------------------------------------------------------------------*/
535 void WritePermConfig(FILE *File, PERM_CONFIG Config) {
536  uinT8 NumAmbigs = 0;
537 
538  assert (Config != NULL);
539  while (Config->Ambigs[NumAmbigs] > 0) ++NumAmbigs;
540 
541  fwrite((char *) &NumAmbigs, sizeof(uinT8), 1, File);
542  fwrite(Config->Ambigs, sizeof(UNICHAR_ID), NumAmbigs, File);
543  fwrite(&(Config->FontinfoId), sizeof(int), 1, File);
544 } /* WritePermConfig */
545 
546 
547 /*---------------------------------------------------------------------------*/
559 void WriteTempConfig(FILE *File, TEMP_CONFIG Config) {
560  assert (Config != NULL);
561  /* contexts not yet implemented */
562  assert (Config->ContextsSeen == NULL);
563 
564  fwrite ((char *) Config, sizeof (TEMP_CONFIG_STRUCT), 1, File);
565  fwrite ((char *) Config->Protos, sizeof (uinT32),
566  Config->ProtoVectorSize, File);
567 
568 } /* WriteTempConfig */
void free_adapted_templates(ADAPT_TEMPLATES templates)
Definition: adaptive.cpp:199
void AddIntClass(INT_TEMPLATES Templates, CLASS_ID ClassId, INT_CLASS Class)
Definition: intproto.cpp:238
INT_TEMPLATES ReadIntTemplates(FILE *File)
Definition: intproto.cpp:761
int count(LIST var_list)
Definition: oldlist.cpp:103
TEMP_CONFIG NewTempConfig(int MaxProtoId, int FontinfoId)
Definition: adaptive.cpp:223
uinT16 NumProtos
Definition: intproto.h:108
#define first_node(l)
Definition: oldlist.h:139
PROTO_STRUCT * PROTO
Definition: protos.h:52
#define TempConfigFor(Class, ConfigId)
Definition: adaptive.h:102
void free_struct(void *deadstruct, inT32, const char *)
Definition: memry.cpp:43
int GetFontinfoId(ADAPT_CLASS Class, uinT8 ConfigId)
Definition: adaptive.cpp:190
void AddAdaptedClass(ADAPT_TEMPLATES Templates, ADAPT_CLASS Class, CLASS_ID ClassId)
Definition: adaptive.cpp:49
int size() const
Definition: unicharset.h:297
#define NIL_LIST
Definition: oldlist.h:126
#define UnusedClassIdIn(T, c)
Definition: intproto.h:180
CLUSTERCONFIG Config
void free_adapted_class(ADAPT_CLASS adapt_class)
Definition: adaptive.cpp:136
ADAPT_CLASS NewAdaptedClass()
Definition: adaptive.cpp:113
void PrintAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:273
void FreeTempConfig(TEMP_CONFIG Config)
Definition: adaptive.cpp:80
void Efree(void *ptr)
Definition: emalloc.cpp:79
void memfree(void *element)
Definition: freelist.cpp:30
#define IsEmptyAdaptedClass(Class)
Definition: adaptive.h:90
unsigned char uinT8
Definition: host.h:32
BIT_VECTOR PermProtos
Definition: adaptive.h:68
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:89
PERM_CONFIG_STRUCT * PERM_CONFIG
Definition: adaptive.h:55
#define MAX_NUM_PROTOS
Definition: intproto.h:47
UNICHAR_ID CLASS_ID
Definition: matchdefs.h:35
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
ADAPT_CLASS ReadAdaptedClass(FILE *File)
Definition: adaptive.cpp:313
TEMP_PROTO_STRUCT * TEMP_PROTO
Definition: adaptive.h:37
TEMP_PROTO NewTempProto()
Definition: adaptive.cpp:254
ADAPT_TEMPLATES ReadAdaptedTemplates(FILE *File)
Definition: adaptive.cpp:369
#define PermConfigFor(Class, ConfigId)
Definition: adaptive.h:105
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:332
uinT8 NumPermConfigs
Definition: adaptive.h:65
#define BITSINLONG
Definition: bitvec.h:27
void WriteAdaptedClass(FILE *File, ADAPT_CLASS Class, int NumConfigs)
Definition: adaptive.cpp:459
void FreeBitVector(BIT_VECTOR BitVector)
Definition: bitvec.cpp:54
INT_CLASS NewIntClass(int MaxNumProtos, int MaxNumConfigs)
Definition: intproto.cpp:664
void free_int_templates(INT_TEMPLATES templates)
Definition: intproto.cpp:739
void FreePermConfig(PERM_CONFIG Config)
Definition: adaptive.cpp:96
INT_TEMPLATES Templates
Definition: adaptive.h:77
uinT8 MaxNumTimesSeen
Definition: adaptive.h:66
BIT_VECTOR PermConfigs
Definition: adaptive.h:69
void destroy_nodes(LIST list, void_dest destructor)
Definition: oldlist.cpp:199
void WritePermConfig(FILE *File, PERM_CONFIG Config)
Definition: adaptive.cpp:535
INT_TEMPLATES NewIntTemplates()
Definition: intproto.cpp:723
PERM_CONFIG ReadPermConfig(FILE *File)
Definition: adaptive.cpp:402
void WriteIntTemplates(FILE *File, INT_TEMPLATES Templates, const UNICHARSET &target_unicharset)
Definition: intproto.cpp:1129
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
void FreeTempProto(void *arg)
Definition: adaptive.cpp:90
#define iterate(l)
Definition: oldlist.h:159
TEMP_CONFIG ReadTempConfig(FILE *File)
Definition: adaptive.cpp:429
#define test_bit(array, bit)
Definition: bitvec.h:61
unsigned int uinT32
Definition: host.h:36
#define LegalClassId(c)
Definition: intproto.h:179
void * alloc_struct(inT32 count, const char *)
Definition: memry.cpp:39
void * Emalloc(int Size)
Definition: emalloc.cpp:47
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
void WriteTempConfig(FILE *File, TEMP_CONFIG Config)
Definition: adaptive.cpp:559
UNICHARSET unicharset
Definition: ccutil.h:70
TEMP_CONFIG_STRUCT * TEMP_CONFIG
Definition: adaptive.h:48
#define zero_all_bits(array, length)
Definition: bitvec.h:33
const char * id_to_unichar(UNICHAR_ID id) const
Definition: unicharset.cpp:266
uinT8 NumConfigs
Definition: intproto.h:110
ADAPT_CLASS_STRUCT * ADAPT_CLASS
Definition: adaptive.h:73
ADAPT_TEMPLATES_STRUCT * ADAPT_TEMPLATES
Definition: adaptive.h:83
ADAPT_TEMPLATES NewAdaptedTemplates(bool InitFromUnicharset)
Definition: adaptive.cpp:167
INT_CLASS Class[MAX_NUM_CLASSES]
Definition: intproto.h:124
#define ConfigIsPermanent(Class, ConfigId)
Definition: adaptive.h:93
void WriteAdaptedTemplates(FILE *File, ADAPT_TEMPLATES Templates)
Definition: adaptive.cpp:505
ADAPT_CLASS Class[MAX_NUM_CLASSES]
Definition: adaptive.h:81
int UNICHAR_ID
Definition: unichar.h:33