tesseract  3.05.02
commontraining.h File Reference
#include "cluster.h"
#include "commandlineflags.h"
#include "featdefs.h"
#include "intproto.h"
#include "oldlist.h"

Go to the source code of this file.

Classes

struct  LABELEDLISTNODE
 
struct  MERGE_CLASS_NODE
 

Namespaces

 tesseract
 

Typedefs

typedef struct LABELEDLISTNODELABELEDLIST
 
typedef MERGE_CLASS_NODEMERGE_CLASS
 

Functions

void ParseArguments (int *argc, char ***argv)
 
ShapeTabletesseract::LoadShapeTable (const STRING &file_prefix)
 
void tesseract::WriteShapeTable (const STRING &file_prefix, const ShapeTable &shape_table)
 
MasterTrainer * tesseract::LoadTrainingData (int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
 
const char * GetNextFilename (int argc, const char *const *argv)
 
LABELEDLIST FindList (LIST List, char *Label)
 
LABELEDLIST NewLabeledList (const char *Label)
 
void ReadTrainingSamples (const FEATURE_DEFS_STRUCT &feature_defs, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples)
 
void WriteTrainingSamples (const FEATURE_DEFS_STRUCT &FeatureDefs, char *Directory, LIST CharList, const char *program_feature_type)
 
void FreeTrainingSamples (LIST CharList)
 
void FreeLabeledList (LABELEDLIST LabeledList)
 
void FreeLabeledClassList (LIST ClassListList)
 
CLUSTERERSetUpForClustering (const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST CharSample, const char *program_feature_type)
 
LIST RemoveInsignificantProtos (LIST ProtoList, BOOL8 KeepSigProtos, BOOL8 KeepInsigProtos, int N)
 
void CleanUpUnusedData (LIST ProtoList)
 
void MergeInsignificantProtos (LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
 
MERGE_CLASS FindClass (LIST List, const char *Label)
 
MERGE_CLASS NewLabeledClass (const char *Label)
 
CLASS_STRUCTSetUpForFloat2Int (const UNICHARSET &unicharset, LIST LabeledClassList)
 
void Normalize (float *Values)
 
void FreeNormProtoList (LIST CharList)
 
void AddToNormProtosList (LIST *NormProtoList, LIST ProtoList, char *CharName)
 
int NumberOfProtos (LIST ProtoList, BOOL8 CountSigProtos, BOOL8 CountInsigProtos)
 
void allocNormProtos ()
 

Variables

FEATURE_DEFS_STRUCT feature_defs
 
CLUSTERCONFIG Config
 

Typedef Documentation

◆ LABELEDLIST

typedef struct LABELEDLISTNODE * LABELEDLIST

◆ MERGE_CLASS

Definition at line 56 of file commontraining.h.

Function Documentation

◆ AddToNormProtosList()

void AddToNormProtosList ( LIST NormProtoList,
LIST  ProtoList,
char *  CharName 
)

Definition at line 840 of file commontraining.cpp.

844 {
845  PROTOTYPE* Proto;
846  LABELEDLIST LabeledProtoList;
847 
848  LabeledProtoList = NewLabeledList(CharName);
849  iterate(ProtoList)
850  {
851  Proto = (PROTOTYPE *) first_node (ProtoList);
852  LabeledProtoList->List = push(LabeledProtoList->List, Proto);
853  }
854  *NormProtoList = push(*NormProtoList, LabeledProtoList);
855 }
#define first_node(l)
Definition: oldlist.h:139
LABELEDLIST NewLabeledList(const char *Label)
LIST push(LIST list, void *element)
Definition: oldlist.cpp:317
#define iterate(l)
Definition: oldlist.h:159

◆ allocNormProtos()

void allocNormProtos ( )

◆ CleanUpUnusedData()

void CleanUpUnusedData ( LIST  ProtoList)

Definition at line 598 of file commontraining.cpp.

600 {
601  PROTOTYPE* Prototype;
602 
603  iterate(ProtoList)
604  {
605  Prototype = (PROTOTYPE *) first_node (ProtoList);
606  if(Prototype->Variance.Elliptical != NULL)
607  {
608  memfree(Prototype->Variance.Elliptical);
609  Prototype->Variance.Elliptical = NULL;
610  }
611  if(Prototype->Magnitude.Elliptical != NULL)
612  {
613  memfree(Prototype->Magnitude.Elliptical);
614  Prototype->Magnitude.Elliptical = NULL;
615  }
616  if(Prototype->Weight.Elliptical != NULL)
617  {
618  memfree(Prototype->Weight.Elliptical);
619  Prototype->Weight.Elliptical = NULL;
620  }
621  }
622 }
#define first_node(l)
Definition: oldlist.h:139
void memfree(void *element)
Definition: freelist.cpp:30
FLOATUNION Variance
Definition: cluster.h:81
FLOATUNION Magnitude
Definition: cluster.h:82
#define iterate(l)
Definition: oldlist.h:159
FLOATUNION Weight
Definition: cluster.h:83
FLOAT32 * Elliptical
Definition: cluster.h:64

◆ FindClass()

MERGE_CLASS FindClass ( LIST  List,
const char *  Label 
)

Definition at line 693 of file commontraining.cpp.

693  {
694  MERGE_CLASS MergeClass;
695 
696  iterate (List)
697  {
698  MergeClass = (MERGE_CLASS) first_node (List);
699  if (strcmp (MergeClass->Label, Label) == 0)
700  return (MergeClass);
701  }
702  return (NULL);
703 
704 } /* FindClass */
#define first_node(l)
Definition: oldlist.h:139
MERGE_CLASS_NODE * MERGE_CLASS
#define iterate(l)
Definition: oldlist.h:159

◆ FindList()

LABELEDLIST FindList ( LIST  List,
char *  Label 
)

This routine searches through a list of labeled lists to find a list with the specified label. If a matching labeled list cannot be found, NULL is returned.

Parameters
Listlist to search
Labellabel to search for
Returns
Labeled list with the specified Label or NULL.
Note
Globals: none
Exceptions: none
History: Fri Aug 18 15:57:41 1989, DSJ, Created.

Definition at line 328 of file commontraining.cpp.

328  {
329  LABELEDLIST LabeledList;
330 
331  iterate (List)
332  {
333  LabeledList = (LABELEDLIST) first_node (List);
334  if (strcmp (LabeledList->Label, Label) == 0)
335  return (LabeledList);
336  }
337  return (NULL);
338 
339 } /* FindList */
#define first_node(l)
Definition: oldlist.h:139
struct LABELEDLISTNODE * LABELEDLIST
#define iterate(l)
Definition: oldlist.h:159

◆ FreeLabeledClassList()

void FreeLabeledClassList ( LIST  ClassList)

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters
ClassListlist of all fonts in document
Returns
none
Note
Globals: none
Exceptions: none
History: Fri Aug 18 17:44:27 1989, DSJ, Created.

Definition at line 728 of file commontraining.cpp.

728  {
729  MERGE_CLASS MergeClass;
730 
731  LIST nodes = ClassList;
732  iterate(ClassList) /* iterate through all of the fonts */
733  {
734  MergeClass = (MERGE_CLASS) first_node (ClassList);
735  free (MergeClass->Label);
736  FreeClass(MergeClass->Class);
737  delete MergeClass;
738  }
739  destroy(nodes);
740 
741 } /* FreeLabeledClassList */
#define first_node(l)
Definition: oldlist.h:139
MERGE_CLASS_NODE * MERGE_CLASS
void FreeClass(CLASS_TYPE Class)
Definition: protos.cpp:215
LIST destroy(LIST list)
Definition: oldlist.cpp:182
#define iterate(l)
Definition: oldlist.h:159
CLASS_TYPE Class

◆ FreeLabeledList()

void FreeLabeledList ( LABELEDLIST  LabeledList)

This routine deallocates all of the memory consumed by a labeled list. It does not free any memory which may be consumed by the items in the list.

Parameters
LabeledListlabeled list to be freed
Note
Globals: none
Returns
none
Note
Exceptions: none
History: Fri Aug 18 17:52:45 1989, DSJ, Created.

Definition at line 479 of file commontraining.cpp.

479  {
480  destroy(LabeledList->List);
481  free(LabeledList->Label);
482  free(LabeledList);
483 } /* FreeLabeledList */
LIST destroy(LIST list)
Definition: oldlist.cpp:182

◆ FreeNormProtoList()

void FreeNormProtoList ( LIST  CharList)

Definition at line 824 of file commontraining.cpp.

826 {
827  LABELEDLIST char_sample;
828 
829  LIST nodes = CharList;
830  iterate(CharList) /* iterate through all of the fonts */
831  {
832  char_sample = (LABELEDLIST) first_node (CharList);
833  FreeLabeledList (char_sample);
834  }
835  destroy(nodes);
836 
837 } // FreeNormProtoList
void FreeLabeledList(LABELEDLIST LabeledList)
#define first_node(l)
Definition: oldlist.h:139
LIST destroy(LIST list)
Definition: oldlist.cpp:182
struct LABELEDLISTNODE * LABELEDLIST
#define iterate(l)
Definition: oldlist.h:159

◆ FreeTrainingSamples()

void FreeTrainingSamples ( LIST  CharList)

This routine deallocates all of the space allocated to the specified list of training samples.

Parameters
CharListlist of all fonts in document
Returns
none
Note
Globals: none
Exceptions: none
History: Fri Aug 18 17:44:27 1989, DSJ, Created.

Definition at line 450 of file commontraining.cpp.

450  {
451  LABELEDLIST char_sample;
452  FEATURE_SET FeatureSet;
453  LIST FeatureList;
454 
455  LIST nodes = CharList;
456  iterate(CharList) { /* iterate through all of the fonts */
457  char_sample = (LABELEDLIST) first_node(CharList);
458  FeatureList = char_sample->List;
459  iterate(FeatureList) { /* iterate through all of the classes */
460  FeatureSet = (FEATURE_SET) first_node(FeatureList);
461  FreeFeatureSet(FeatureSet);
462  }
463  FreeLabeledList(char_sample);
464  }
465  destroy(nodes);
466 } /* FreeTrainingSamples */
void FreeLabeledList(LABELEDLIST LabeledList)
#define first_node(l)
Definition: oldlist.h:139
FEATURE_SET_STRUCT * FEATURE_SET
Definition: ocrfeatures.h:74
LIST destroy(LIST list)
Definition: oldlist.cpp:182
struct LABELEDLISTNODE * LABELEDLIST
#define iterate(l)
Definition: oldlist.h:159
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:77

◆ GetNextFilename()

const char* GetNextFilename ( int  argc,
const char *const *  argv 
)

This routine returns the next command line argument. If there are no remaining command line arguments, it returns NULL. This routine should only be called after all option arguments have been parsed and removed with ParseArguments.

Globals:

  • tessoptind defined by tessopt sys call
    Returns
    Next command line argument or NULL.
    Note
    Exceptions: none
    History: Fri Aug 18 09:34:12 1989, DSJ, Created.

Definition at line 309 of file commontraining.cpp.

309  {
310  if (tessoptind < argc)
311  return argv[tessoptind++];
312  else
313  return NULL;
314 } /* GetNextFilename */
int tessoptind
Definition: tessopt.cpp:24

◆ MergeInsignificantProtos()

void MergeInsignificantProtos ( LIST  ProtoList,
const char *  label,
CLUSTERER Clusterer,
CLUSTERCONFIG Config 
)

Definition at line 533 of file commontraining.cpp.

534  {
535  PROTOTYPE* Prototype;
536  bool debug = strcmp(FLAGS_test_ch.c_str(), label) == 0;
537 
538  LIST pProtoList = ProtoList;
539  iterate(pProtoList) {
540  Prototype = (PROTOTYPE *) first_node (pProtoList);
541  if (Prototype->Significant || Prototype->Merged)
542  continue;
543  FLOAT32 best_dist = 0.125;
544  PROTOTYPE* best_match = NULL;
545  // Find the nearest alive prototype.
546  LIST list_it = ProtoList;
547  iterate(list_it) {
548  PROTOTYPE* test_p = (PROTOTYPE *) first_node (list_it);
549  if (test_p != Prototype && !test_p->Merged) {
550  FLOAT32 dist = ComputeDistance(Clusterer->SampleSize,
551  Clusterer->ParamDesc,
552  Prototype->Mean, test_p->Mean);
553  if (dist < best_dist) {
554  best_match = test_p;
555  best_dist = dist;
556  }
557  }
558  }
559  if (best_match != NULL && !best_match->Significant) {
560  if (debug)
561  tprintf("Merging red clusters (%d+%d) at %g,%g and %g,%g\n",
562  best_match->NumSamples, Prototype->NumSamples,
563  best_match->Mean[0], best_match->Mean[1],
564  Prototype->Mean[0], Prototype->Mean[1]);
565  best_match->NumSamples = MergeClusters(Clusterer->SampleSize,
566  Clusterer->ParamDesc,
567  best_match->NumSamples,
568  Prototype->NumSamples,
569  best_match->Mean,
570  best_match->Mean, Prototype->Mean);
571  Prototype->NumSamples = 0;
572  Prototype->Merged = 1;
573  } else if (best_match != NULL) {
574  if (debug)
575  tprintf("Red proto at %g,%g matched a green one at %g,%g\n",
576  Prototype->Mean[0], Prototype->Mean[1],
577  best_match->Mean[0], best_match->Mean[1]);
578  Prototype->Merged = 1;
579  }
580  }
581  // Mark significant those that now have enough samples.
582  int min_samples = (inT32) (Config->MinSamples * Clusterer->NumChar);
583  pProtoList = ProtoList;
584  iterate(pProtoList) {
585  Prototype = (PROTOTYPE *) first_node (pProtoList);
586  // Process insignificant protos that do not match a green one
587  if (!Prototype->Significant && Prototype->NumSamples >= min_samples &&
588  !Prototype->Merged) {
589  if (debug)
590  tprintf("Red proto at %g,%g becoming green\n",
591  Prototype->Mean[0], Prototype->Mean[1]);
592  Prototype->Significant = true;
593  }
594  }
595 } /* MergeInsignificantProtos */
FLOAT32 ComputeDistance(int k, PARAM_DESC *dim, FLOAT32 p1[], FLOAT32 p2[])
Definition: kdtree.cpp:472
#define first_node(l)
Definition: oldlist.h:139
PARAM_DESC * ParamDesc
Definition: cluster.h:88
unsigned Merged
Definition: cluster.h:69
CLUSTERCONFIG Config
unsigned Significant
Definition: cluster.h:68
unsigned NumSamples
Definition: cluster.h:75
FLOAT32 MinSamples
Definition: cluster.h:50
float FLOAT32
Definition: host.h:44
FLOAT32 * Mean
Definition: cluster.h:78
int inT32
Definition: host.h:35
#define tprintf(...)
Definition: tprintf.h:31
#define iterate(l)
Definition: oldlist.h:159
inT32 NumChar
Definition: cluster.h:93
inT16 SampleSize
Definition: cluster.h:87
inT32 MergeClusters(inT16 N, register PARAM_DESC ParamDesc[], register inT32 n1, register inT32 n2, register FLOAT32 m[], register FLOAT32 m1[], register FLOAT32 m2[])

◆ NewLabeledClass()

MERGE_CLASS NewLabeledClass ( const char *  Label)

Definition at line 707 of file commontraining.cpp.

707  {
708  MERGE_CLASS MergeClass;
709 
710  MergeClass = new MERGE_CLASS_NODE;
711  MergeClass->Label = (char*)Emalloc (strlen (Label)+1);
712  strcpy (MergeClass->Label, Label);
713  MergeClass->Class = NewClass (MAX_NUM_PROTOS, MAX_NUM_CONFIGS);
714  return (MergeClass);
715 
716 } /* NewLabeledClass */
CLASS_TYPE NewClass(int NumProtos, int NumConfigs)
Definition: protos.cpp:248
#define MAX_NUM_PROTOS
Definition: intproto.h:47
void * Emalloc(int Size)
Definition: emalloc.cpp:47
#define MAX_NUM_CONFIGS
Definition: intproto.h:46
CLASS_TYPE Class

◆ NewLabeledList()

LABELEDLIST NewLabeledList ( const char *  Label)

This routine allocates a new, empty labeled list and gives it the specified label.

Parameters
Labellabel for new list
Returns
New, empty labeled list.
Note
Globals: none
Exceptions: none
History: Fri Aug 18 16:08:46 1989, DSJ, Created.

Definition at line 351 of file commontraining.cpp.

351  {
352  LABELEDLIST LabeledList;
353 
354  LabeledList = (LABELEDLIST) Emalloc (sizeof (LABELEDLISTNODE));
355  LabeledList->Label = (char*)Emalloc (strlen (Label)+1);
356  strcpy (LabeledList->Label, Label);
357  LabeledList->List = NIL_LIST;
358  LabeledList->SampleCount = 0;
359  LabeledList->font_sample_count = 0;
360  return (LabeledList);
361 
362 } /* NewLabeledList */
#define NIL_LIST
Definition: oldlist.h:126
struct LABELEDLISTNODE * LABELEDLIST
void * Emalloc(int Size)
Definition: emalloc.cpp:47

◆ Normalize()

void Normalize ( float *  Values)

Definition at line 807 of file commontraining.cpp.

809 {
810  float Slope;
811  float Intercept;
812  float Normalizer;
813 
814  Slope = tan (Values [2] * 2 * PI);
815  Intercept = Values [1] - Slope * Values [0];
816  Normalizer = 1 / sqrt (Slope * Slope + 1.0);
817 
818  Values [0] = Slope * Normalizer;
819  Values [1] = - Normalizer;
820  Values [2] = Intercept * Normalizer;
821 } // Normalize
#define PI
Definition: const.h:19

◆ NumberOfProtos()

int NumberOfProtos ( LIST  ProtoList,
BOOL8  CountSigProtos,
BOOL8  CountInsigProtos 
)

Definition at line 858 of file commontraining.cpp.

859  {
860  int N = 0;
861  PROTOTYPE* Proto;
862 
863  iterate(ProtoList)
864  {
865  Proto = (PROTOTYPE *) first_node ( ProtoList );
866  if ((Proto->Significant && CountSigProtos) ||
867  (!Proto->Significant && CountInsigProtos))
868  N++;
869  }
870  return(N);
871 }
#define first_node(l)
Definition: oldlist.h:139
unsigned Significant
Definition: cluster.h:68
#define iterate(l)
Definition: oldlist.h:159

◆ ParseArguments()

void ParseArguments ( int *  argc,
char ***  argv 
)

This routine parses the command line arguments that were passed to the program and ses them to set relevant training-related global parameters

Globals:

  • Config current clustering parameters
    Parameters
    argcnumber of command line arguments to parse
    argvcommand line arguments
    Returns
    none
    Note
    Exceptions: Illegal options terminate the program.

Definition at line 87 of file commontraining.cpp.

87  {
88  STRING usage;
89  if (*argc) {
90  usage += (*argv)[0];
91  }
92  usage += " [.tr files ...]";
93  tesseract::ParseCommandLineFlags(usage.c_str(), argc, argv, true);
94  // Record the index of the first non-flag argument to 1, since we set
95  // remove_flags to true when parsing the flags.
96  tessoptind = 1;
97  // Set some global values based on the flags.
99  MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_min_samples_fraction)));
101  MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_max_illegal)));
103  MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_independence)));
105  MAX(0.0, MIN(1.0, double(FLAGS_clusterconfig_confidence)));
106  // Set additional parameters from config file if specified.
107  if (!FLAGS_configfile.empty()) {
109  FLAGS_configfile.c_str(),
111  ccutil.params());
112  }
113 }
FLOAT64 Confidence
Definition: cluster.h:54
CLUSTERCONFIG Config
#define MIN(x, y)
Definition: ndminx.h:28
FLOAT32 Independence
Definition: cluster.h:53
FLOAT32 MinSamples
Definition: cluster.h:50
const char * c_str() const
Definition: strngs.cpp:212
#define MAX(x, y)
Definition: ndminx.h:24
ParamsVectors * params()
Definition: ccutil.h:63
Definition: strngs.h:44
static bool TESS_API ReadParamsFile(const char *file, SetParamConstraint constraint, ParamsVectors *member_params)
Definition: params.cpp:40
void ParseCommandLineFlags(const char *usage, int *argc, char ***argv, const bool remove_flags)
CCUtil ccutil
int tessoptind
Definition: tessopt.cpp:24
FLOAT32 MaxIllegal
Definition: cluster.h:51

◆ ReadTrainingSamples()

void ReadTrainingSamples ( const FEATURE_DEFS_STRUCT feature_defs,
const char *  feature_name,
int  max_samples,
UNICHARSET unicharset,
FILE *  file,
LIST training_samples 
)

This routine reads training samples from a file and places them into a data structure which organizes the samples by FontName and CharName. It then returns this data structure.

Parameters
fileopen text file to read samples from
feature_defs
feature_name
max_samples
unicharset
training_samples
Returns
none
Note
Globals: none
Exceptions: none
History:
  • Fri Aug 18 13:11:39 1989, DSJ, Created.
  • Tue May 17 1998 simplifications to structure, illiminated font, and feature specification levels of structure.

Definition at line 386 of file commontraining.cpp.

389  {
390  char buffer[2048];
391  char unichar[UNICHAR_LEN + 1];
392  LABELEDLIST char_sample;
393  FEATURE_SET feature_samples;
394  CHAR_DESC char_desc;
395  int i;
396  int feature_type = ShortNameToFeatureType(feature_defs, feature_name);
397  // Zero out the font_sample_count for all the classes.
398  LIST it = *training_samples;
399  iterate(it) {
400  char_sample = reinterpret_cast<LABELEDLIST>(first_node(it));
401  char_sample->font_sample_count = 0;
402  }
403 
404  while (fgets(buffer, 2048, file) != NULL) {
405  if (buffer[0] == '\n')
406  continue;
407 
408  sscanf(buffer, "%*s %s", unichar);
409  if (unicharset != NULL && !unicharset->contains_unichar(unichar)) {
410  unicharset->unichar_insert(unichar);
411  if (unicharset->size() > MAX_NUM_CLASSES) {
412  tprintf("Error: Size of unicharset in training is "
413  "greater than MAX_NUM_CLASSES\n");
414  exit(1);
415  }
416  }
417  char_sample = FindList(*training_samples, unichar);
418  if (char_sample == NULL) {
419  char_sample = NewLabeledList(unichar);
420  *training_samples = push(*training_samples, char_sample);
421  }
422  char_desc = ReadCharDescription(feature_defs, file);
423  feature_samples = char_desc->FeatureSets[feature_type];
424  if (char_sample->font_sample_count < max_samples || max_samples <= 0) {
425  char_sample->List = push(char_sample->List, feature_samples);
426  char_sample->SampleCount++;
427  char_sample->font_sample_count++;
428  } else {
429  FreeFeatureSet(feature_samples);
430  }
431  for (i = 0; i < char_desc->NumFeatureSets; i++) {
432  if (feature_type != i)
433  FreeFeatureSet(char_desc->FeatureSets[i]);
434  }
435  free(char_desc);
436  }
437 } // ReadTrainingSamples
FEATURE_DEFS_STRUCT feature_defs
#define first_node(l)
Definition: oldlist.h:139
int ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)
Definition: featdefs.cpp:302
FEATURE_SET FeatureSets[NUM_FEATURE_TYPES]
Definition: featdefs.h:44
bool TESS_API contains_unichar(const char *const unichar_repr) const
Definition: unicharset.cpp:644
int size() const
Definition: unicharset.h:297
LABELEDLIST FindList(LIST List, char *Label)
LABELEDLIST NewLabeledList(const char *Label)
uinT32 NumFeatureSets
Definition: featdefs.h:43
#define UNICHAR_LEN
Definition: unichar.h:30
void TESS_API unichar_insert(const char *const unichar_repr)
Definition: unicharset.cpp:612
LIST push(LIST list, void *element)
Definition: oldlist.cpp:317
#define tprintf(...)
Definition: tprintf.h:31
#define MAX_NUM_CLASSES
Definition: matchdefs.h:31
#define iterate(l)
Definition: oldlist.h:159
void FreeFeatureSet(FEATURE_SET FeatureSet)
Definition: ocrfeatures.cpp:77
CHAR_DESC ReadCharDescription(const FEATURE_DEFS_STRUCT &FeatureDefs, FILE *File)
Definition: featdefs.cpp:263

◆ RemoveInsignificantProtos()

LIST RemoveInsignificantProtos ( LIST  ProtoList,
BOOL8  KeepSigProtos,
BOOL8  KeepInsigProtos,
int  N 
)

Definition at line 625 of file commontraining.cpp.

631 {
632  LIST NewProtoList = NIL_LIST;
633  LIST pProtoList;
634  PROTOTYPE* Proto;
635  PROTOTYPE* NewProto;
636  int i;
637 
638  pProtoList = ProtoList;
639  iterate(pProtoList)
640  {
641  Proto = (PROTOTYPE *) first_node (pProtoList);
642  if ((Proto->Significant && KeepSigProtos) ||
643  (!Proto->Significant && KeepInsigProtos))
644  {
645  NewProto = (PROTOTYPE *)Emalloc(sizeof(PROTOTYPE));
646 
647  NewProto->Mean = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
648  NewProto->Significant = Proto->Significant;
649  NewProto->Style = Proto->Style;
650  NewProto->NumSamples = Proto->NumSamples;
651  NewProto->Cluster = NULL;
652  NewProto->Distrib = NULL;
653 
654  for (i=0; i < N; i++)
655  NewProto->Mean[i] = Proto->Mean[i];
656  if (Proto->Variance.Elliptical != NULL)
657  {
658  NewProto->Variance.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
659  for (i=0; i < N; i++)
660  NewProto->Variance.Elliptical[i] = Proto->Variance.Elliptical[i];
661  }
662  else
663  NewProto->Variance.Elliptical = NULL;
664  //---------------------------------------------
665  if (Proto->Magnitude.Elliptical != NULL)
666  {
667  NewProto->Magnitude.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
668  for (i=0; i < N; i++)
669  NewProto->Magnitude.Elliptical[i] = Proto->Magnitude.Elliptical[i];
670  }
671  else
672  NewProto->Magnitude.Elliptical = NULL;
673  //------------------------------------------------
674  if (Proto->Weight.Elliptical != NULL)
675  {
676  NewProto->Weight.Elliptical = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
677  for (i=0; i < N; i++)
678  NewProto->Weight.Elliptical[i] = Proto->Weight.Elliptical[i];
679  }
680  else
681  NewProto->Weight.Elliptical = NULL;
682 
683  NewProto->TotalMagnitude = Proto->TotalMagnitude;
684  NewProto->LogMagnitude = Proto->LogMagnitude;
685  NewProtoList = push_last(NewProtoList, NewProto);
686  }
687  }
688  FreeProtoList(&ProtoList);
689  return (NewProtoList);
690 } /* RemoveInsignificantProtos */
CLUSTER * Cluster
Definition: cluster.h:76
#define first_node(l)
Definition: oldlist.h:139
FLOAT32 TotalMagnitude
Definition: cluster.h:79
#define NIL_LIST
Definition: oldlist.h:126
FLOAT32 LogMagnitude
Definition: cluster.h:80
FLOATUNION Variance
Definition: cluster.h:81
unsigned Significant
Definition: cluster.h:68
LIST push_last(LIST list, void *item)
Definition: oldlist.cpp:332
unsigned NumSamples
Definition: cluster.h:75
float FLOAT32
Definition: host.h:44
DISTRIBUTION * Distrib
Definition: cluster.h:77
FLOAT32 * Mean
Definition: cluster.h:78
FLOATUNION Magnitude
Definition: cluster.h:82
#define iterate(l)
Definition: oldlist.h:159
unsigned Style
Definition: cluster.h:74
void * Emalloc(int Size)
Definition: emalloc.cpp:47
FLOATUNION Weight
Definition: cluster.h:83
void FreeProtoList(LIST *ProtoList)
Definition: cluster.cpp:574
FLOAT32 * Elliptical
Definition: cluster.h:64

◆ SetUpForClustering()

CLUSTERER* SetUpForClustering ( const FEATURE_DEFS_STRUCT FeatureDefs,
LABELEDLIST  char_sample,
const char *  program_feature_type 
)

This routine reads samples from a LABELEDLIST and enters those samples into a clusterer data structure. This data structure is then returned to the caller.

Parameters
char_sampleLABELEDLIST that holds all the feature information for a
FeatureDefs
program_feature_typegiven character.
Returns
Pointer to new clusterer data structure.
Note
Globals: None
Exceptions: None
History: 8/16/89, DSJ, Created.

Definition at line 499 of file commontraining.cpp.

501  {
502  uinT16 N;
503  int i, j;
504  FLOAT32 *Sample = NULL;
505  CLUSTERER *Clusterer;
506  inT32 CharID;
507  LIST FeatureList = NULL;
508  FEATURE_SET FeatureSet = NULL;
509 
510  int desc_index = ShortNameToFeatureType(FeatureDefs, program_feature_type);
511  N = FeatureDefs.FeatureDesc[desc_index]->NumParams;
512  Clusterer = MakeClusterer(N, FeatureDefs.FeatureDesc[desc_index]->ParamDesc);
513 
514  FeatureList = char_sample->List;
515  CharID = 0;
516  iterate(FeatureList) {
517  FeatureSet = (FEATURE_SET) first_node(FeatureList);
518  for (i = 0; i < FeatureSet->MaxNumFeatures; i++) {
519  if (Sample == NULL)
520  Sample = (FLOAT32 *)Emalloc(N * sizeof(FLOAT32));
521  for (j = 0; j < N; j++)
522  Sample[j] = FeatureSet->Features[i]->Params[j];
523  MakeSample (Clusterer, Sample, CharID);
524  }
525  CharID++;
526  }
527  free(Sample);
528  return Clusterer;
529 
530 } /* SetUpForClustering */
SAMPLE * MakeSample(CLUSTERER *Clusterer, const FLOAT32 *Feature, inT32 CharID)
Definition: cluster.cpp:456
#define first_node(l)
Definition: oldlist.h:139
int ShortNameToFeatureType(const FEATURE_DEFS_STRUCT &FeatureDefs, const char *ShortName)
Definition: featdefs.cpp:302
CLUSTERER * MakeClusterer(inT16 SampleSize, const PARAM_DESC ParamDesc[])
Definition: cluster.cpp:400
const FEATURE_DESC_STRUCT * FeatureDesc[NUM_FEATURE_TYPES]
Definition: featdefs.h:50
FLOAT32 Params[1]
Definition: ocrfeatures.h:65
FEATURE_SET_STRUCT * FEATURE_SET
Definition: ocrfeatures.h:74
unsigned short uinT16
Definition: host.h:34
const PARAM_DESC * ParamDesc
Definition: ocrfeatures.h:59
float FLOAT32
Definition: host.h:44
int inT32
Definition: host.h:35
#define iterate(l)
Definition: oldlist.h:159
void * Emalloc(int Size)
Definition: emalloc.cpp:47
FEATURE Features[1]
Definition: ocrfeatures.h:72

◆ SetUpForFloat2Int()

CLASS_STRUCT* SetUpForFloat2Int ( const UNICHARSET unicharset,
LIST  LabeledClassList 
)

Definition at line 744 of file commontraining.cpp.

745  {
746  MERGE_CLASS MergeClass;
747  CLASS_TYPE Class;
748  int NumProtos;
749  int NumConfigs;
750  int NumWords;
751  int i, j;
752  float Values[3];
753  PROTO NewProto;
754  PROTO OldProto;
755  BIT_VECTOR NewConfig;
756  BIT_VECTOR OldConfig;
757 
758  // printf("Float2Int ...\n");
759 
760  CLASS_STRUCT* float_classes = new CLASS_STRUCT[unicharset.size()];
761  iterate(LabeledClassList)
762  {
763  UnicityTableEqEq<int> font_set;
764  MergeClass = (MERGE_CLASS) first_node (LabeledClassList);
765  Class = &float_classes[unicharset.unichar_to_id(MergeClass->Label)];
766  NumProtos = MergeClass->Class->NumProtos;
767  NumConfigs = MergeClass->Class->NumConfigs;
768  font_set.move(&MergeClass->Class->font_set);
769  Class->NumProtos = NumProtos;
770  Class->MaxNumProtos = NumProtos;
771  Class->Prototypes = (PROTO) Emalloc (sizeof(PROTO_STRUCT) * NumProtos);
772  for(i=0; i < NumProtos; i++)
773  {
774  NewProto = ProtoIn(Class, i);
775  OldProto = ProtoIn(MergeClass->Class, i);
776  Values[0] = OldProto->X;
777  Values[1] = OldProto->Y;
778  Values[2] = OldProto->Angle;
779  Normalize(Values);
780  NewProto->X = OldProto->X;
781  NewProto->Y = OldProto->Y;
782  NewProto->Length = OldProto->Length;
783  NewProto->Angle = OldProto->Angle;
784  NewProto->A = Values[0];
785  NewProto->B = Values[1];
786  NewProto->C = Values[2];
787  }
788 
789  Class->NumConfigs = NumConfigs;
790  Class->MaxNumConfigs = NumConfigs;
791  Class->font_set.move(&font_set);
792  Class->Configurations = (BIT_VECTOR*) Emalloc (sizeof(BIT_VECTOR) * NumConfigs);
793  NumWords = WordsInVectorOfSize(NumProtos);
794  for(i=0; i < NumConfigs; i++)
795  {
796  NewConfig = NewBitVector(NumProtos);
797  OldConfig = MergeClass->Class->Configurations[i];
798  for(j=0; j < NumWords; j++)
799  NewConfig[j] = OldConfig[j];
800  Class->Configurations[i] = NewConfig;
801  }
802  }
803  return float_classes;
804 } // SetUpForFloat2Int
#define first_node(l)
Definition: oldlist.h:139
PROTO_STRUCT * PROTO
Definition: protos.h:52
CONFIGS Configurations
Definition: protos.h:64
uinT32 * BIT_VECTOR
Definition: bitvec.h:28
inT16 NumProtos
Definition: protos.h:59
int size() const
Definition: unicharset.h:297
MERGE_CLASS_NODE * MERGE_CLASS
UnicityTableEqEq< int > font_set
Definition: protos.h:65
PROTO Prototypes
Definition: protos.h:61
FLOAT32 Angle
Definition: protos.h:49
void Normalize(float *Values)
BIT_VECTOR NewBitVector(int NumBits)
Definition: bitvec.cpp:89
FLOAT32 X
Definition: protos.h:47
#define WordsInVectorOfSize(NumBits)
Definition: bitvec.h:63
FLOAT32 Length
Definition: protos.h:50
inT16 NumConfigs
Definition: protos.h:62
FLOAT32 C
Definition: protos.h:46
inT16 MaxNumProtos
Definition: protos.h:60
FLOAT32 A
Definition: protos.h:44
inT16 MaxNumConfigs
Definition: protos.h:63
UNICHAR_ID TESS_API unichar_to_id(const char *const unichar_repr) const
Definition: unicharset.cpp:194
FLOAT32 Y
Definition: protos.h:48
#define iterate(l)
Definition: oldlist.h:159
#define ProtoIn(Class, Pid)
Definition: protos.h:123
void * Emalloc(int Size)
Definition: emalloc.cpp:47
CLASS_TYPE Class
void move(UnicityTable< T > *from)
FLOAT32 B
Definition: protos.h:45

◆ WriteTrainingSamples()

void WriteTrainingSamples ( const FEATURE_DEFS_STRUCT FeatureDefs,
char *  Directory,
LIST  CharList,
const char *  program_feature_type 
)

Variable Documentation

◆ Config

CLUSTERCONFIG Config

Definition at line 50 of file commontraining.cpp.

◆ feature_defs

FEATURE_DEFS_STRUCT feature_defs

Definition at line 51 of file commontraining.cpp.