44 STRING output_fname = fname;
45 const char *lastdot = strrchr(output_fname.
string(),
'.');
46 if (lastdot != NULL) output_fname[lastdot - output_fname.
string()] =
'\0';
47 output_fname +=
".txt";
54 while (page_res_it->
block() != NULL && page_res_it->
word() == NULL)
57 if (page_res_it->
word() != NULL) {
64 if (tbox->
left() < 0) {
84 const char *lastdot = strrchr(box_fname.
string(),
'.');
85 if (lastdot != NULL) box_fname[lastdot - box_fname.
string()] =
'\0';
92 page_res_it.restart_page();
100 int examined_words = 0;
102 keep_going =
read_t(&page_res_it, &tbox);
109 page_res_it.forward();
110 keep_going =
read_t(&page_res_it, &tbox);
119 page_res_it.forward();
120 keep_going =
read_t(&page_res_it, &tbox);
133 page_res_it.forward();
134 }
while (keep_going);
141 for (page_res_it.restart_page(); page_res_it.block() != NULL;
142 page_res_it.forward()) {
143 if (page_res_it.word()) {
144 if (page_res_it.word()->uch_set == NULL)
149 if (examined_words < 0.85 * total_words) {
150 tprintf(
"TODO(antonova): clean up recog_training_segmented; " 151 " It examined only a small fraction of the ambigs image.\n");
153 tprintf(
"recog_training_segmented: examined %d / %d words.\n",
154 examined_words, total_words);
158 static void PrintPath(
int length,
const BLOB_CHOICE** blob_choices,
160 const char *label, FILE *output_file) {
162 float certainty = 0.0f;
163 for (
int i = 0; i < length; ++i) {
165 fprintf(output_file,
"%s",
167 rating += blob_choice->
rating();
168 if (certainty > blob_choice->
certainty())
171 fprintf(output_file,
"\t%s\t%.4f\t%.4f\n",
172 label, rating, certainty);
177 static void PrintMatrixPaths(
int col,
int dim,
181 const char *label, FILE *output_file) {
182 for (
int row = col; row < dim && row - col < ratings.
bandwidth(); ++row) {
184 BLOB_CHOICE_IT bc_it(ratings.
get(col, row));
185 for (bc_it.mark_cycle_pt(); !bc_it.cycled_list(); bc_it.forward()) {
186 blob_choices[length] = bc_it.data();
188 PrintMatrixPaths(row + 1, dim, ratings, length + 1, blob_choices,
189 unicharset, label, output_file);
191 PrintPath(length + 1, blob_choices, unicharset, label, output_file);
217 tprintf(
"Not outputting illegal unichar %s\n", label);
224 PrintMatrixPaths(0, dim, *werd_res->
ratings, 0, blob_choices,
226 delete [] blob_choices;
void rotate(const FCOORD &vec)
bool ReadNextBox(int *line_number, FILE *box_file, STRING *utf8_str, TBOX *bounding_box)
FILE * open_file(const char *filename, const char *mode)
bool read_t(PAGE_RES_IT *page_res_it, TBOX *tbox)
WERD_CHOICE * best_choice
bool tessedit_enable_doc_dict
UNICHAR_ID unichar_id() const
bool encode_string(const char *str, bool give_up_on_failure, GenericVector< UNICHAR_ID > *encoding, GenericVector< char > *lengths, int *encoded_length) const
const char * string() const
bool tessedit_ambigs_training
bool stopper_no_acceptable_choices
BLOCK_RES * block() const
void SetupWordPassN(int pass_n, WordData *word)
const inT16 kMaxBoxEdgeDiff
FILE * init_recog_training(const STRING &fname)
void classify_word_and_language(int pass_n, PAGE_RES_IT *pr_it, WordData *word_data)
int tessedit_tess_adaption_mode
void ambigs_classify_and_output(const char *label, PAGE_RES_IT *pr_it, FILE *output_file)
const char * id_to_unichar(UNICHAR_ID id) const
TBOX bounding_box() const
void recog_training_segmented(const STRING &fname, PAGE_RES *page_res, volatile ETEXT_DESC *monitor, FILE *output_file)