tesseract  3.05.02
clusttool.cpp File Reference
#include "clusttool.h"
#include "const.h"
#include "danerror.h"
#include "emalloc.h"
#include "scanutils.h"
#include <stdio.h>
#include <math.h>

Go to the source code of this file.

Macros

#define TOKENSIZE   80
 
#define MAXSAMPLESIZE   65535
 

Functions

uinT16 ReadSampleSize (FILE *File)
 
PARAM_DESCReadParamDesc (FILE *File, uinT16 N)
 
PROTOTYPEReadPrototype (FILE *File, uinT16 N)
 
PROTOSTYLE ReadProtoStyle (FILE *File)
 
FLOAT32ReadNFloats (FILE *File, uinT16 N, FLOAT32 Buffer[])
 
void WriteParamDesc (FILE *File, uinT16 N, const PARAM_DESC ParamDesc[])
 
void WritePrototype (FILE *File, uinT16 N, PROTOTYPE *Proto)
 
void WriteNFloats (FILE *File, uinT16 N, FLOAT32 Array[])
 
void WriteProtoStyle (FILE *File, PROTOSTYLE ProtoStyle)
 
void WriteProtoList (FILE *File, uinT16 N, PARAM_DESC ParamDesc[], LIST ProtoList, BOOL8 WriteSigProtos, BOOL8 WriteInsigProtos)
 

Macro Definition Documentation

◆ MAXSAMPLESIZE

#define MAXSAMPLESIZE   65535

Definition at line 30 of file clusttool.cpp.

◆ TOKENSIZE

#define TOKENSIZE   80

Definition at line 29 of file clusttool.cpp.

Function Documentation

◆ ReadNFloats()

FLOAT32* ReadNFloats ( FILE *  File,
uinT16  N,
FLOAT32  Buffer[] 
)

This routine reads N floats from the specified text file and places them into Buffer. If Buffer is NULL, a buffer is created and passed back to the caller. If EOF is encountered before any floats can be read, NULL is returned.

Parameters
Fileopen text file to read floats from
Nnumber of floats to read
Bufferpointer to buffer to place floats into
Returns
Pointer to buffer holding floats or NULL if EOF
Note
Globals: None
Exceptions: ILLEGALFLOAT
History: 6/6/89, DSJ, Created.

Definition at line 282 of file clusttool.cpp.

282  {
283  bool needs_free = false;
284  int i;
285  int NumFloatsRead;
286 
287  if (Buffer == NULL) {
288  Buffer = reinterpret_cast<FLOAT32*>(Emalloc(N * sizeof(FLOAT32)));
289  needs_free = true;
290  }
291 
292  for (i = 0; i < N; i++) {
293  NumFloatsRead = tfscanf(File, "%f", &(Buffer[i]));
294  if (NumFloatsRead != 1) {
295  if ((NumFloatsRead == EOF) && (i == 0)) {
296  if (needs_free) {
297  Efree(Buffer);
298  }
299  return NULL;
300  } else {
301  DoError(ILLEGALFLOAT, "Illegal float specification");
302  }
303  }
304  }
305  return Buffer;
306 }
#define ILLEGALFLOAT
Definition: clusttool.h:62
void Efree(void *ptr)
Definition: emalloc.cpp:79
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:228
float FLOAT32
Definition: host.h:44
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42
void * Emalloc(int Size)
Definition: emalloc.cpp:47

◆ ReadParamDesc()

PARAM_DESC* ReadParamDesc ( FILE *  File,
uinT16  N 
)

This routine reads textual descriptions of sets of parameters which describe the characteristics of feature dimensions.

Exceptions:

  • ILLEGALCIRCULARSPEC
  • ILLEGALESSENTIALSPEC
  • ILLEGALMINMAXSPEC
    Parameters
    Fileopen text file to read N parameter descriptions from
    Nnumber of parameter descriptions to read
    Returns
    Pointer to an array of parameter descriptors.
    Note
    Globals: None
    History: 6/6/89, DSJ, Created.

Definition at line 67 of file clusttool.cpp.

67  {
68  int i;
69  PARAM_DESC *ParamDesc;
70  char Token[TOKENSIZE];
71 
72  ParamDesc = (PARAM_DESC *) Emalloc (N * sizeof (PARAM_DESC));
73  for (i = 0; i < N; i++) {
74  if (tfscanf(File, "%s", Token) != 1)
76  "Illegal circular/linear specification");
77  if (Token[0] == 'c')
78  ParamDesc[i].Circular = TRUE;
79  else
80  ParamDesc[i].Circular = FALSE;
81 
82  if (tfscanf(File, "%s", Token) != 1)
84  "Illegal essential/non-essential spec");
85  if (Token[0] == 'e')
86  ParamDesc[i].NonEssential = FALSE;
87  else
88  ParamDesc[i].NonEssential = TRUE;
89  if (tfscanf(File, "%f%f", &(ParamDesc[i].Min), &(ParamDesc[i].Max)) != 2)
90  DoError (ILLEGALMINMAXSPEC, "Illegal min or max specification");
91  ParamDesc[i].Range = ParamDesc[i].Max - ParamDesc[i].Min;
92  ParamDesc[i].HalfRange = ParamDesc[i].Range / 2;
93  ParamDesc[i].MidRange = (ParamDesc[i].Max + ParamDesc[i].Min) / 2;
94  }
95  return (ParamDesc);
96 }
#define TRUE
Definition: capi.h:45
#define ILLEGALCIRCULARSPEC
Definition: clusttool.h:54
#define ILLEGALMINMAXSPEC
Definition: clusttool.h:55
FLOAT32 Range
Definition: ocrfeatures.h:51
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:228
#define ILLEGALESSENTIALSPEC
Definition: clusttool.h:63
#define FALSE
Definition: capi.h:46
FLOAT32 HalfRange
Definition: ocrfeatures.h:52
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42
FLOAT32 MidRange
Definition: ocrfeatures.h:53
inT8 Circular
Definition: ocrfeatures.h:47
inT8 NonEssential
Definition: ocrfeatures.h:48
#define TOKENSIZE
Definition: clusttool.cpp:29
FLOAT32 Min
Definition: ocrfeatures.h:49
void * Emalloc(int Size)
Definition: emalloc.cpp:47
FLOAT32 Max
Definition: ocrfeatures.h:50

◆ ReadProtoStyle()

PROTOSTYLE ReadProtoStyle ( FILE *  File)

This routine reads an single token from the specified text file and interprets it as a prototype specification.

Parameters
Fileopen text file to read prototype style from
Returns
Prototype style read from text file
Note
Globals: None
Exceptions: ILLEGALSTYLESPEC illegal prototype style specification
History: 6/8/89, DSJ, Created.

Definition at line 242 of file clusttool.cpp.

242  {
243  char Token[TOKENSIZE];
244  PROTOSTYLE Style;
245 
246  if (tfscanf(File, "%s", Token) != 1)
247  DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
248  switch (Token[0]) {
249  case 's':
250  Style = spherical;
251  break;
252  case 'e':
253  Style = elliptical;
254  break;
255  case 'm':
256  Style = mixed;
257  break;
258  case 'a':
259  Style = automatic;
260  break;
261  default:
262  Style = elliptical;
263  DoError (ILLEGALSTYLESPEC, "Illegal prototype style specification");
264  }
265  return (Style);
266 }
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:228
Definition: cluster.h:45
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42
#define TOKENSIZE
Definition: clusttool.cpp:29
#define ILLEGALSTYLESPEC
Definition: clusttool.h:57
PROTOSTYLE
Definition: cluster.h:44

◆ ReadPrototype()

PROTOTYPE* ReadPrototype ( FILE *  File,
uinT16  N 
)

This routine reads a textual description of a prototype from the specified file.

Exceptions:

  • ILLEGALSIGNIFICANCESPEC
  • ILLEGALSAMPLECOUNT
  • ILLEGALMEANSPEC
  • ILLEGALVARIANCESPEC
  • ILLEGALDISTRIBUTION
    Parameters
    Fileopen text file to read prototype from
    Nnumber of dimensions used in prototype
    Returns
    List of prototypes
    Note
    Globals: None
    History: 6/6/89, DSJ, Created.

Definition at line 114 of file clusttool.cpp.

114  {
115  char Token[TOKENSIZE];
116  int Status;
117  PROTOTYPE *Proto;
118  int SampleCount;
119  int i;
120 
121  if ((Status = tfscanf(File, "%s", Token)) == 1) {
122  Proto = (PROTOTYPE *) Emalloc (sizeof (PROTOTYPE));
123  Proto->Cluster = NULL;
124  if (Token[0] == 's')
125  Proto->Significant = TRUE;
126  else
127  Proto->Significant = FALSE;
128 
129  Proto->Style = ReadProtoStyle (File);
130 
131  if ((tfscanf(File, "%d", &SampleCount) != 1) || (SampleCount < 0))
132  DoError (ILLEGALSAMPLECOUNT, "Illegal sample count");
133  Proto->NumSamples = SampleCount;
134 
135  Proto->Mean = ReadNFloats (File, N, NULL);
136  if (Proto->Mean == NULL)
137  DoError (ILLEGALMEANSPEC, "Illegal prototype mean");
138 
139  switch (Proto->Style) {
140  case spherical:
141  if (ReadNFloats (File, 1, &(Proto->Variance.Spherical)) == NULL)
142  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
143  Proto->Magnitude.Spherical =
144  1.0 / sqrt ((double) (2.0 * PI * Proto->Variance.Spherical));
145  Proto->TotalMagnitude =
146  pow (Proto->Magnitude.Spherical, (float) N);
147  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
148  Proto->Weight.Spherical = 1.0 / Proto->Variance.Spherical;
149  Proto->Distrib = NULL;
150  break;
151  case elliptical:
152  Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
153  if (Proto->Variance.Elliptical == NULL)
154  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
155  Proto->Magnitude.Elliptical =
156  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
157  Proto->Weight.Elliptical =
158  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
159  Proto->TotalMagnitude = 1.0;
160  for (i = 0; i < N; i++) {
161  Proto->Magnitude.Elliptical[i] =
162  1.0 /
163  sqrt ((double) (2.0 * PI * Proto->Variance.Elliptical[i]));
164  Proto->Weight.Elliptical[i] =
165  1.0 / Proto->Variance.Elliptical[i];
166  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
167  }
168  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
169  Proto->Distrib = NULL;
170  break;
171  case mixed:
172  Proto->Distrib =
173  (DISTRIBUTION *) Emalloc (N * sizeof (DISTRIBUTION));
174  for (i = 0; i < N; i++) {
175  if (tfscanf(File, "%s", Token) != 1)
177  "Illegal prototype distribution");
178  switch (Token[0]) {
179  case 'n':
180  Proto->Distrib[i] = normal;
181  break;
182  case 'u':
183  Proto->Distrib[i] = uniform;
184  break;
185  case 'r':
186  Proto->Distrib[i] = D_random;
187  break;
188  default:
190  "Illegal prototype distribution");
191  }
192  }
193  Proto->Variance.Elliptical = ReadNFloats (File, N, NULL);
194  if (Proto->Variance.Elliptical == NULL)
195  DoError (ILLEGALVARIANCESPEC, "Illegal prototype variance");
196  Proto->Magnitude.Elliptical =
197  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
198  Proto->Weight.Elliptical =
199  (FLOAT32 *) Emalloc (N * sizeof (FLOAT32));
200  Proto->TotalMagnitude = 1.0;
201  for (i = 0; i < N; i++) {
202  switch (Proto->Distrib[i]) {
203  case normal:
204  Proto->Magnitude.Elliptical[i] = 1.0 /
205  sqrt ((double)
206  (2.0 * PI * Proto->Variance.Elliptical[i]));
207  Proto->Weight.Elliptical[i] =
208  1.0 / Proto->Variance.Elliptical[i];
209  break;
210  case uniform:
211  case D_random:
212  Proto->Magnitude.Elliptical[i] = 1.0 /
213  (2.0 * Proto->Variance.Elliptical[i]);
214  break;
215  case DISTRIBUTION_COUNT:
216  ASSERT_HOST(!"Distribution count not allowed!");
217  }
218  Proto->TotalMagnitude *= Proto->Magnitude.Elliptical[i];
219  }
220  Proto->LogMagnitude = log ((double) Proto->TotalMagnitude);
221  break;
222  }
223  return (Proto);
224  }
225  else if (Status == EOF)
226  return (NULL);
227  else {
228  DoError (ILLEGALSIGNIFICANCESPEC, "Illegal significance specification");
229  return (NULL);
230  }
231 }
CLUSTER * Cluster
Definition: cluster.h:76
#define TRUE
Definition: capi.h:45
#define ILLEGALDISTRIBUTION
Definition: clusttool.h:61
DISTRIBUTION
Definition: cluster.h:58
FLOAT32 TotalMagnitude
Definition: cluster.h:79
Definition: cluster.h:59
#define PI
Definition: const.h:19
FLOAT32 LogMagnitude
Definition: cluster.h:80
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:228
FLOATUNION Variance
Definition: cluster.h:81
unsigned Significant
Definition: cluster.h:68
#define ILLEGALSIGNIFICANCESPEC
Definition: clusttool.h:56
unsigned NumSamples
Definition: cluster.h:75
#define FALSE
Definition: capi.h:46
Definition: cluster.h:45
#define ILLEGALMEANSPEC
Definition: clusttool.h:59
float FLOAT32
Definition: host.h:44
DISTRIBUTION * Distrib
Definition: cluster.h:77
FLOAT32 Spherical
Definition: cluster.h:63
FLOAT32 * Mean
Definition: cluster.h:78
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42
FLOATUNION Magnitude
Definition: cluster.h:82
#define TOKENSIZE
Definition: clusttool.cpp:29
FLOAT32 * ReadNFloats(FILE *File, uinT16 N, FLOAT32 Buffer[])
Definition: clusttool.cpp:282
unsigned Style
Definition: cluster.h:74
void * Emalloc(int Size)
Definition: emalloc.cpp:47
#define ILLEGALSAMPLECOUNT
Definition: clusttool.h:58
#define ILLEGALVARIANCESPEC
Definition: clusttool.h:60
PROTOSTYLE ReadProtoStyle(FILE *File)
Definition: clusttool.cpp:242
#define ASSERT_HOST(x)
Definition: errcode.h:84
FLOATUNION Weight
Definition: cluster.h:83
FLOAT32 * Elliptical
Definition: cluster.h:64

◆ ReadSampleSize()

uinT16 ReadSampleSize ( FILE *  File)

This routine reads a single integer from the specified file and checks to ensure that it is between 0 and MAXSAMPLESIZE.

Parameters
Fileopen text file to read sample size from
Returns
Sample size
Note
Globals: None
Exceptions: ILLEGALSAMPLESIZE illegal format or range
History: 6/6/89, DSJ, Created.

Definition at line 44 of file clusttool.cpp.

44  {
45  int SampleSize;
46 
47  if ((tfscanf(File, "%d", &SampleSize) != 1) ||
48  (SampleSize < 0) || (SampleSize > MAXSAMPLESIZE))
49  DoError (ILLEGALSAMPLESIZE, "Illegal sample size");
50  return (SampleSize);
51 }
#define ILLEGALSAMPLESIZE
Definition: clusttool.h:53
int tfscanf(FILE *stream, const char *format,...)
Definition: scanutils.cpp:228
void DoError(int Error, const char *Message)
Definition: danerror.cpp:42
#define MAXSAMPLESIZE
Definition: clusttool.cpp:30

◆ WriteNFloats()

void WriteNFloats ( FILE *  File,
uinT16  N,
FLOAT32  Array[] 
)

This routine writes a text representation of N floats from an array to a file. All of the floats are placed on one line.

Parameters
Fileopen text file to write N floats to
Nnumber of floats to write
Arrayarray of floats to write
Returns
None
Note
Globals: None
Exceptions: None
History: 6/6/89, DSJ, Created.

Definition at line 398 of file clusttool.cpp.

398  {
399  for (int i = 0; i < N; i++)
400  fprintf(File, " %9.6f", Array[i]);
401  fprintf(File, "\n");
402 }

◆ WriteParamDesc()

void WriteParamDesc ( FILE *  File,
uinT16  N,
const PARAM_DESC  ParamDesc[] 
)

This routine writes an array of dimension descriptors to the specified text file.

Parameters
Fileopen text file to write param descriptors to
Nnumber of param descriptors to write
ParamDescarray of param descriptors to write
Returns
None
Note
Globals: None
Exceptions: None
History: 6/6/89, DSJ, Created.

Definition at line 319 of file clusttool.cpp.

319  {
320  int i;
321 
322  for (i = 0; i < N; i++) {
323  if (ParamDesc[i].Circular)
324  fprintf (File, "circular ");
325  else
326  fprintf (File, "linear ");
327 
328  if (ParamDesc[i].NonEssential)
329  fprintf (File, "non-essential ");
330  else
331  fprintf (File, "essential ");
332 
333  fprintf (File, "%10.6f %10.6f\n", ParamDesc[i].Min, ParamDesc[i].Max);
334  }
335 }

◆ WriteProtoList()

void WriteProtoList ( FILE *  File,
uinT16  N,
PARAM_DESC  ParamDesc[],
LIST  ProtoList,
BOOL8  WriteSigProtos,
BOOL8  WriteInsigProtos 
)

This routine writes a textual description of each prototype in the prototype list to the specified file. It also writes a file header which includes the number of dimensions in feature space and the descriptions for each dimension.

Parameters
Fileopen text file to write prototypes to
Nnumber of dimensions in feature space
ParamDescdescriptions for each dimension
ProtoListlist of prototypes to be written
WriteSigProtosTRUE to write out significant prototypes
WriteInsigProtosTRUE to write out insignificants
Note
Globals: None
Returns
None
Note
Exceptions: None
History: 6/12/89, DSJ, Created.

Definition at line 449 of file clusttool.cpp.

451  {
452  PROTOTYPE *Proto;
453 
454  /* write file header */
455  fprintf(File,"%0d\n",N);
456  WriteParamDesc(File,N,ParamDesc);
457 
458  /* write prototypes */
459  iterate(ProtoList)
460  {
461  Proto = (PROTOTYPE *) first_node ( ProtoList );
462  if ((Proto->Significant && WriteSigProtos) ||
463  (!Proto->Significant && WriteInsigProtos))
464  WritePrototype(File, N, Proto);
465  }
466 }
#define first_node(l)
Definition: oldlist.h:139
unsigned Significant
Definition: cluster.h:68
#define iterate(l)
Definition: oldlist.h:159
void WriteParamDesc(FILE *File, uinT16 N, const PARAM_DESC ParamDesc[])
Definition: clusttool.cpp:319
void WritePrototype(FILE *File, uinT16 N, PROTOTYPE *Proto)
Definition: clusttool.cpp:348

◆ WriteProtoStyle()

void WriteProtoStyle ( FILE *  File,
PROTOSTYLE  ProtoStyle 
)

This routine writes to the specified text file a word which represents the ProtoStyle. It does not append a carriage return to the end.

Parameters
Fileopen text file to write prototype style to
ProtoStyleprototype style to write
Returns
None
Note
Globals: None
Exceptions: None
History: 6/8/89, DSJ, Created.

Definition at line 415 of file clusttool.cpp.

415  {
416  switch (ProtoStyle) {
417  case spherical:
418  fprintf (File, "spherical");
419  break;
420  case elliptical:
421  fprintf (File, "elliptical");
422  break;
423  case mixed:
424  fprintf (File, "mixed");
425  break;
426  case automatic:
427  fprintf (File, "automatic");
428  break;
429  }
430 }
Definition: cluster.h:45

◆ WritePrototype()

void WritePrototype ( FILE *  File,
uinT16  N,
PROTOTYPE Proto 
)

This routine writes a textual description of a prototype to the specified text file.

Parameters
Fileopen text file to write prototype to
Nnumber of dimensions in feature space
Protoprototype to write out
Returns
None
Note
Globals: None
Exceptions: None
History: 6/12/89, DSJ, Created.

Definition at line 348 of file clusttool.cpp.

348  {
349  int i;
350 
351  if (Proto->Significant)
352  fprintf (File, "significant ");
353  else
354  fprintf (File, "insignificant ");
355  WriteProtoStyle (File, (PROTOSTYLE) Proto->Style);
356  fprintf (File, "%6d\n\t", Proto->NumSamples);
357  WriteNFloats (File, N, Proto->Mean);
358  fprintf (File, "\t");
359 
360  switch (Proto->Style) {
361  case spherical:
362  WriteNFloats (File, 1, &(Proto->Variance.Spherical));
363  break;
364  case elliptical:
365  WriteNFloats (File, N, Proto->Variance.Elliptical);
366  break;
367  case mixed:
368  for (i = 0; i < N; i++)
369  switch (Proto->Distrib[i]) {
370  case normal:
371  fprintf (File, " %9s", "normal");
372  break;
373  case uniform:
374  fprintf (File, " %9s", "uniform");
375  break;
376  case D_random:
377  fprintf (File, " %9s", "random");
378  break;
379  case DISTRIBUTION_COUNT:
380  ASSERT_HOST(!"Distribution count not allowed!");
381  }
382  fprintf (File, "\n\t");
383  WriteNFloats (File, N, Proto->Variance.Elliptical);
384  }
385 }
Definition: cluster.h:59
FLOATUNION Variance
Definition: cluster.h:81
unsigned Significant
Definition: cluster.h:68
unsigned NumSamples
Definition: cluster.h:75
Definition: cluster.h:45
DISTRIBUTION * Distrib
Definition: cluster.h:77
void WriteNFloats(FILE *File, uinT16 N, FLOAT32 Array[])
Definition: clusttool.cpp:398
FLOAT32 Spherical
Definition: cluster.h:63
FLOAT32 * Mean
Definition: cluster.h:78
unsigned Style
Definition: cluster.h:74
void WriteProtoStyle(FILE *File, PROTOSTYLE ProtoStyle)
Definition: clusttool.cpp:415
#define ASSERT_HOST(x)
Definition: errcode.h:84
PROTOSTYLE
Definition: cluster.h:44
FLOAT32 * Elliptical
Definition: cluster.h:64