tesseract
3.05.02
|
#include <tessdatamanager.h>
Public Member Functions | |
TessdataManager () | |
~TessdataManager () | |
int | DebugLevel () |
bool | Init (const char *data_file_name, int debug_level) |
const STRING & | GetDataFileName () const |
FILE * | GetDataFilePtr () const |
bool | SeekToStart (TessdataType tessdata_type) |
inT64 | GetEndOffset (TessdataType tessdata_type) const |
void | End () |
bool | swap () const |
bool | OverwriteComponents (const char *new_traineddata_filename, char **component_filenames, int num_new_components) |
bool | ExtractToFile (const char *filename) |
Static Public Member Functions | |
static bool | WriteMetadata (inT64 *offset_table, const char *language_data_path_prefix, FILE *output_file) |
static bool | CombineDataFiles (const char *language_data_path_prefix, const char *output_filename) |
static void | CopyFile (FILE *input_file, FILE *output_file, bool newline_end, inT64 num_bytes_to_copy) |
static bool | TessdataTypeFromFileSuffix (const char *suffix, TessdataType *type, bool *text_file) |
static bool | TessdataTypeFromFileName (const char *filename, TessdataType *type, bool *text_file) |
Definition at line 133 of file tessdatamanager.h.
|
inline |
Definition at line 135 of file tessdatamanager.h.
|
inline |
Definition at line 142 of file tessdatamanager.h.
|
static |
Reads all the standard tesseract config and data files for a language at the given path and bundles them up into one binary data file. Returns true if the combined traineddata file was successfully written.
Definition at line 127 of file tessdatamanager.cpp.
|
static |
Copies data from the given input file to the output_file provided. If num_bytes_to_copy is >= 0, only num_bytes_to_copy is copied from the input file, otherwise all the data in the input file is copied.
Definition at line 74 of file tessdatamanager.cpp.
|
inline |
Definition at line 143 of file tessdatamanager.h.
|
inline |
Closes data_file_ (if it was opened by Init()).
Definition at line 192 of file tessdatamanager.h.
bool tesseract::TessdataManager::ExtractToFile | ( | const char * | filename | ) |
Extracts tessdata component implied by the name of the input file from the combined traineddata loaded into TessdataManager. Writes the extracted component to the file indicated by the file name. E.g. if the filename given is somepath/somelang.unicharset, unicharset will be extracted from the data loaded into the TessdataManager and will be written to somepath/somelang.unicharset.
Definition at line 259 of file tessdatamanager.cpp.
|
inline |
Definition at line 152 of file tessdatamanager.h.
|
inline |
Returns data file pointer.
Definition at line 155 of file tessdatamanager.h.
|
inline |
Returns the end offset for the given tesseract data file type.
Definition at line 178 of file tessdatamanager.h.
bool tesseract::TessdataManager::Init | ( | const char * | data_file_name, |
int | debug_level | ||
) |
Opens the given data file and reads the offset table.
Definition at line 36 of file tessdatamanager.cpp.
bool tesseract::TessdataManager::OverwriteComponents | ( | const char * | new_traineddata_filename, |
char ** | component_filenames, | ||
int | num_new_components | ||
) |
Gets the individual components from the data_file_ with which the class was initialized. Overwrites the components specified by component_filenames. Writes the updated traineddata file to new_traineddata_filename.
Definition at line 183 of file tessdatamanager.cpp.
|
inline |
Returns false if there is no data of the given type. Otherwise does a seek on the data_file_ to position the pointer at the start of the data of the given type.
Definition at line 162 of file tessdatamanager.h.
|
inline |
Definition at line 198 of file tessdatamanager.h.
|
static |
Tries to determine tessdata component file suffix from filename, returns true on success.
Definition at line 251 of file tessdatamanager.cpp.
|
static |
Fills type with TessdataType of the tessdata component represented by the given file name. E.g. tessdata/eng.unicharset -> TESSDATA_UNICHARSET. Sets *text_file to true if the component is in text format (e.g. unicharset, unichar ambigs, config, etc).
Definition at line 237 of file tessdatamanager.cpp.
|
static |
Writes the number of entries and the given offset table to output_file. Returns false on error.
Definition at line 101 of file tessdatamanager.cpp.