tesseract  3.05.02
commontraining.h
Go to the documentation of this file.
1 // Copyright 2008 Google Inc. All Rights Reserved.
2 // Author: scharron@google.com (Samuel Charron)
3 //
4 // Licensed under the Apache License, Version 2.0 (the "License");
5 // you may not use this file except in compliance with the License.
6 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
8 // Unless required by applicable law or agreed to in writing, software
9 // distributed under the License is distributed on an "AS IS" BASIS,
10 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11 // See the License for the specific language governing permissions and
12 // limitations under the License.
13 
14 #ifndef TESSERACT_TRAINING_COMMONTRAINING_H__
15 #define TESSERACT_TRAINING_COMMONTRAINING_H__
16 
17 #include "cluster.h"
18 #include "commandlineflags.h"
19 #include "featdefs.h"
20 #include "intproto.h"
21 #include "oldlist.h"
22 
23 namespace tesseract {
24 class Classify;
25 class MasterTrainer;
26 class ShapeTable;
27 }
28 
30 // Globals ///////////////////////////////////////////////////////////////////
32 
34 
35 // Must be defined in the file that "implements" commonTraining facilities.
36 extern CLUSTERCONFIG Config;
37 
39 // Structs ///////////////////////////////////////////////////////////////////
41 typedef struct
42 {
43  char *Label;
47 }
49 
50 typedef struct
51 {
52  char* Label;
53  int NumMerged[MAX_NUM_PROTOS];
57 
58 
60 // Functions /////////////////////////////////////////////////////////////////
62 void ParseArguments(int* argc, char*** argv);
63 
64 namespace tesseract {
65 
66 // Helper loads shape table from the given file.
67 ShapeTable* LoadShapeTable(const STRING& file_prefix);
68 // Helper to write the shape_table.
69 void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table);
70 
71 // Creates a MasterTraininer and loads the training data into it:
72 // Initializes feature_defs and IntegerFX.
73 // Loads the shape_table if shape_table != NULL.
74 // Loads initial unicharset from -U command-line option.
75 // If FLAGS_input_trainer is set, loads the majority of data from there, else:
76 // Loads font info from -F option.
77 // Loads xheights from -X option.
78 // Loads samples from .tr files in remaining command-line args.
79 // Deletes outliers and computes canonical samples.
80 // If FLAGS_output_trainer is set, saves the trainer for future use.
81 // Computes canonical and cloud features.
82 // If shape_table is not NULL, but failed to load, make a fake flat one,
83 // as shape clustering was not run.
84 MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
85  bool replication,
86  ShapeTable** shape_table,
87  STRING* file_prefix);
88 } // namespace tesseract.
89 
90 const char *GetNextFilename(int argc, const char* const * argv);
91 
93  LIST List,
94  char *Label);
95 
97  const char *Label);
98 
100  const char *feature_name, int max_samples,
101  UNICHARSET* unicharset,
102  FILE* file, LIST* training_samples);
103 
105  const FEATURE_DEFS_STRUCT &FeatureDefs,
106  char *Directory,
107  LIST CharList,
108  const char *program_feature_type);
109 
111  LIST CharList);
112 
113 void FreeLabeledList(
114  LABELEDLIST LabeledList);
115 
117  LIST ClassListList);
118 
120  const FEATURE_DEFS_STRUCT &FeatureDefs,
121  LABELEDLIST CharSample,
122  const char *program_feature_type);
123 
125  LIST ProtoList,
126  BOOL8 KeepSigProtos,
127  BOOL8 KeepInsigProtos,
128  int N);
129 
130 void CleanUpUnusedData(
131  LIST ProtoList);
132 
134  LIST ProtoList,
135  const char *label,
136  CLUSTERER *Clusterer,
138 
140  LIST List,
141  const char *Label);
142 
144  const char *Label);
145 
147  LIST CharList);
148 
149 CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
150  LIST LabeledClassList);
151 
152 void Normalize(
153  float *Values);
154 
155 void FreeNormProtoList(
156  LIST CharList);
157 
159  LIST* NormProtoList,
160  LIST ProtoList,
161  char *CharName);
162 
163 int NumberOfProtos(
164  LIST ProtoList,
165  BOOL8 CountSigProtos,
166  BOOL8 CountInsigProtos);
167 
168 
169 void allocNormProtos();
170 #endif // TESSERACT_TRAINING_COMMONTRAINING_H__
LABELEDLIST NewLabeledList(const char *Label)
void FreeTrainingSamples(LIST CharList)
FEATURE_DEFS_STRUCT feature_defs
void MergeInsignificantProtos(LIST ProtoList, const char *label, CLUSTERER *Clusterer, CLUSTERCONFIG *Config)
void FreeNormProtoList(LIST CharList)
int NumberOfProtos(LIST ProtoList, BOOL8 CountSigProtos, BOOL8 CountInsigProtos)
void Normalize(float *Values)
MERGE_CLASS_NODE * MERGE_CLASS
LIST RemoveInsignificantProtos(LIST ProtoList, BOOL8 KeepSigProtos, BOOL8 KeepInsigProtos, int N)
MERGE_CLASS FindClass(LIST List, const char *Label)
void allocNormProtos()
void ParseArguments(int *argc, char ***argv)
unsigned char BOOL8
Definition: host.h:46
ShapeTable * LoadShapeTable(const STRING &file_prefix)
#define MAX_NUM_PROTOS
Definition: intproto.h:47
void CleanUpUnusedData(LIST ProtoList)
void FreeLabeledList(LABELEDLIST LabeledList)
MasterTrainer * LoadTrainingData(int argc, const char *const *argv, bool replication, ShapeTable **shape_table, STRING *file_prefix)
void WriteShapeTable(const STRING &file_prefix, const ShapeTable &shape_table)
CLASS_STRUCT * SetUpForFloat2Int(const UNICHARSET &unicharset, LIST LabeledClassList)
void AddToNormProtosList(LIST *NormProtoList, LIST ProtoList, char *CharName)
Definition: strngs.h:44
void ReadTrainingSamples(const FEATURE_DEFS_STRUCT &feature_defs, const char *feature_name, int max_samples, UNICHARSET *unicharset, FILE *file, LIST *training_samples)
const char * GetNextFilename(int argc, const char *const *argv)
CLUSTERCONFIG Config
MERGE_CLASS NewLabeledClass(const char *Label)
void FreeLabeledClassList(LIST ClassListList)
struct LABELEDLISTNODE * LABELEDLIST
void WriteTrainingSamples(const FEATURE_DEFS_STRUCT &FeatureDefs, char *Directory, LIST CharList, const char *program_feature_type)
CLASS_TYPE Class
CLUSTERER * SetUpForClustering(const FEATURE_DEFS_STRUCT &FeatureDefs, LABELEDLIST CharSample, const char *program_feature_type)
LABELEDLIST FindList(LIST List, char *Label)