tesseract  3.05.02
tesseractmain.cpp
Go to the documentation of this file.
1 /**********************************************************************
2 * File: tessedit.cpp (Formerly tessedit.c)
3 * Description: Main program for merge of tess and editor.
4 * Author: Ray Smith
5 * Created: Tue Jan 07 15:21:46 GMT 1992
6 *
7 * (C) Copyright 1992, Hewlett-Packard Ltd.
8 ** Licensed under the Apache License, Version 2.0 (the "License");
9 ** you may not use this file except in compliance with the License.
10 ** You may obtain a copy of the License at
11 ** http://www.apache.org/licenses/LICENSE-2.0
12 ** Unless required by applicable law or agreed to in writing, software
13 ** distributed under the License is distributed on an "AS IS" BASIS,
14 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 ** See the License for the specific language governing permissions and
16 ** limitations under the License.
17 *
18 **********************************************************************/
19 
20 // Include automatically generated configuration file if running autoconf
21 #ifdef HAVE_CONFIG_H
22 #include "config_auto.h"
23 #endif
24 
25 #include <iostream>
26 
27 #include "allheaders.h"
28 #include "baseapi.h"
29 #include "basedir.h"
30 #include "dict.h"
31 #include "openclwrapper.h"
32 #include "osdetect.h"
33 #include "renderer.h"
34 #include "strngs.h"
35 #include "tprintf.h"
36 
37 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
38 
39 #include <tiffio.h>
40 
41 static void Win32WarningHandler(const char* module, const char* fmt,
42  va_list ap) {
43  if (module != NULL) {
44  fprintf(stderr, "%s: ", module);
45  }
46  fprintf(stderr, "Warning, ");
47  vfprintf(stderr, fmt, ap);
48  fprintf(stderr, ".\n");
49 }
50 
51 #endif /* HAVE_TIFFIO_H && _WIN32 */
52 
54  char* versionStrP;
55 
56  printf("tesseract %s\n", tesseract::TessBaseAPI::Version());
57 
58  versionStrP = getLeptonicaVersion();
59  printf(" %s\n", versionStrP);
60  lept_free(versionStrP);
61 
62  versionStrP = getImagelibVersions();
63  printf(" %s\n", versionStrP);
64  lept_free(versionStrP);
65 
66 #ifdef USE_OPENCL
67  cl_platform_id platform[4];
68  cl_uint num_platforms;
69 
70  printf(" OpenCL info:\n");
71  if (clGetPlatformIDs(4, platform, &num_platforms) == CL_SUCCESS) {
72  printf(" Found %u platform(s).\n", num_platforms);
73  for (unsigned n = 0; n < num_platforms; n++) {
74  char info[256];
75  if (clGetPlatformInfo(platform[n], CL_PLATFORM_NAME, 256, info, 0) ==
76  CL_SUCCESS) {
77  printf(" Platform %u name: %s.\n", n + 1, info);
78  }
79  if (clGetPlatformInfo(platform[n], CL_PLATFORM_VERSION, 256, info, 0) ==
80  CL_SUCCESS) {
81  printf(" Version: %s.\n", info);
82  }
83  cl_device_id devices[2];
84  cl_uint num_devices;
85  if (clGetDeviceIDs(platform[n], CL_DEVICE_TYPE_ALL, 2, devices,
86  &num_devices) == CL_SUCCESS) {
87  printf(" Found %u device(s).\n", num_devices);
88  for (unsigned i = 0; i < num_devices; ++i) {
89  if (clGetDeviceInfo(devices[i], CL_DEVICE_NAME, 256, info, 0) ==
90  CL_SUCCESS) {
91  printf(" Device %u name: %s.\n", i + 1, info);
92  }
93  }
94  }
95  }
96  }
97 #endif
98 }
99 
100 void PrintUsage(const char* program) {
101  printf(
102  "Usage:\n"
103  " %s --help | --help-psm | --help-oem | --version\n"
104  " %s --list-langs [--tessdata-dir PATH]\n"
105  " %s --print-parameters [options...] [configfile...]\n"
106  " %s imagename|stdin outputbase|stdout [options...] [configfile...]\n",
107  program, program, program, program);
108 }
109 
111  const char* msg =
112  "Page segmentation modes:\n"
113  " 0 Orientation and script detection (OSD) only.\n"
114  " 1 Automatic page segmentation with OSD.\n"
115  " 2 Automatic page segmentation, but no OSD, or OCR.\n"
116  " 3 Fully automatic page segmentation, but no OSD. (Default)\n"
117  " 4 Assume a single column of text of variable sizes.\n"
118  " 5 Assume a single uniform block of vertically aligned text.\n"
119  " 6 Assume a single uniform block of text.\n"
120  " 7 Treat the image as a single text line.\n"
121  " 8 Treat the image as a single word.\n"
122  " 9 Treat the image as a single word in a circle.\n"
123  " 10 Treat the image as a single character.\n"
124  " 11 Sparse text. Find as much text as possible in no"
125  " particular order.\n"
126  " 12 Sparse text with OSD.\n"
127  " 13 Raw line. Treat the image as a single text line,\n"
128  "\t\t\tbypassing hacks that are Tesseract-specific.\n";
129 
130  printf("%s", msg);
131 }
132 
134  const char* msg =
135  "OCR Engine modes:\n"
136  " 0 Original Tesseract only.\n"
137  " 1 Cube only.\n"
138  " 2 Tesseract + cube.\n"
139  " 3 Default, based on what is available.\n";
140 
141  printf("%s", msg);
142 }
143 
144 void PrintHelpMessage(const char* program) {
145  PrintUsage(program);
146 
147  const char* ocr_options =
148  "OCR options:\n"
149  " --tessdata-dir PATH Specify the location of tessdata path.\n"
150  " --user-words PATH Specify the location of user words file.\n"
151  " --user-patterns PATH Specify the location of user patterns file.\n"
152  " -l LANG[+LANG] Specify language(s) used for OCR.\n"
153  " -c VAR=VALUE Set value for config variables.\n"
154  " Multiple -c arguments are allowed.\n"
155  " --psm NUM Specify page segmentation mode.\n"
156  " --oem NUM Specify OCR Engine mode.\n"
157  "NOTE: These options must occur before any configfile.\n";
158 
159  printf("\n%s\n", ocr_options);
160  PrintHelpForPSM();
161  PrintHelpForOEM();
162 
163  const char* single_options =
164  "Single options:\n"
165  " -h, --help Show this help message.\n"
166  " --help-psm Show page segmentation modes.\n"
167  " --help-oem Show OCR Engine modes.\n"
168  " -v, --version Show version information.\n"
169  " --list-langs List available languages for tesseract engine.\n"
170  " --print-parameters Print tesseract parameters to stdout.\n";
171 
172  printf("\n%s", single_options);
173 }
174 
176  char** argv) {
177  char opt1[256], opt2[255];
178  for (int i = 0; i < argc; i++) {
179  if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
180  strncpy(opt1, argv[i + 1], 255);
181  opt1[255] = '\0';
182  char* p = strchr(opt1, '=');
183  if (!p) {
184  fprintf(stderr, "Missing = in configvar assignment\n");
185  exit(1);
186  }
187  *p = 0;
188  strncpy(opt2, strchr(argv[i + 1], '=') + 1, 255);
189  opt2[254] = 0;
190  ++i;
191 
192  if (!api->SetVariable(opt1, opt2)) {
193  fprintf(stderr, "Could not set option: %s=%s\n", opt1, opt2);
194  }
195  }
196  }
197 }
198 
200  GenericVector<STRING> languages;
201  api->GetAvailableLanguagesAsVector(&languages);
202  printf("List of available languages (%d):\n", languages.size());
203  for (int index = 0; index < languages.size(); ++index) {
204  STRING& string = languages[index];
205  printf("%s\n", string.string());
206  }
207  api->End();
208 }
209 
210 void PrintBanner() {
211  tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n",
213 }
214 
230  tesseract::PageSegMode pagesegmode) {
231  if (api->GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK)
232  api->SetPageSegMode(pagesegmode);
233 }
234 
235 // NOTE: arg_i is used here to avoid ugly *i so many times in this function
236 void ParseArgs(const int argc, char** argv, const char** lang,
237  const char** image, const char** outputbase,
238  const char** datapath, bool* list_langs, bool* print_parameters,
239  GenericVector<STRING>* vars_vec,
240  GenericVector<STRING>* vars_values, int* arg_i,
241  tesseract::PageSegMode* pagesegmode,
242  tesseract::OcrEngineMode* enginemode) {
243  if (argc == 1) {
244  PrintHelpMessage(argv[0]);
245  exit(0);
246  }
247 
248  if (argc == 2) {
249  if ((strcmp(argv[1], "-h") == 0) || (strcmp(argv[1], "--help") == 0)) {
250  PrintHelpMessage(argv[0]);
251  exit(0);
252  }
253  if ((strcmp(argv[1], "--help-psm") == 0)) {
254  PrintHelpForPSM();
255  exit(0);
256  }
257  if ((strcmp(argv[1], "--help-oem") == 0)) {
258  PrintHelpForOEM();
259  exit(0);
260  }
261  if ((strcmp(argv[1], "-v") == 0) || (strcmp(argv[1], "--version") == 0)) {
263  exit(0);
264  }
265  }
266 
267  bool noocr = false;
268  int i = 1;
269  while (i < argc && (*outputbase == NULL || argv[i][0] == '-')) {
270  if (strcmp(argv[i], "-l") == 0 && i + 1 < argc) {
271  *lang = argv[i + 1];
272  ++i;
273  } else if (strcmp(argv[i], "--tessdata-dir") == 0 && i + 1 < argc) {
274  *datapath = argv[i + 1];
275  ++i;
276  } else if (strcmp(argv[i], "--user-words") == 0 && i + 1 < argc) {
277  vars_vec->push_back("user_words_file");
278  vars_values->push_back(argv[i + 1]);
279  ++i;
280  } else if (strcmp(argv[i], "--user-patterns") == 0 && i + 1 < argc) {
281  vars_vec->push_back("user_patterns_file");
282  vars_values->push_back(argv[i + 1]);
283  ++i;
284  } else if (strcmp(argv[i], "--list-langs") == 0) {
285  noocr = true;
286  *list_langs = true;
287  } else if (strcmp(argv[i], "-psm") == 0 && i + 1 < argc) {
288  // The parameter -psm is deprecated and was replaced by --psm.
289  // It is still supported for compatibility reasons.
290  *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
291  ++i;
292  } else if (strcmp(argv[i], "--psm") == 0 && i + 1 < argc) {
293  *pagesegmode = static_cast<tesseract::PageSegMode>(atoi(argv[i + 1]));
294  ++i;
295  } else if (strcmp(argv[i], "--oem") == 0 && i + 1 < argc) {
296  *enginemode = static_cast<tesseract::OcrEngineMode>(atoi(argv[i + 1]));
297  ++i;
298  } else if (strcmp(argv[i], "--print-parameters") == 0) {
299  noocr = true;
300  *print_parameters = true;
301  } else if (strcmp(argv[i], "-c") == 0 && i + 1 < argc) {
302  // handled properly after api init
303  ++i;
304  } else if (*image == NULL) {
305  *image = argv[i];
306  } else if (*outputbase == NULL) {
307  *outputbase = argv[i];
308  }
309  ++i;
310  }
311 
312  *arg_i = i;
313 
314  if (argc == 2 && strcmp(argv[1], "--list-langs") == 0) {
315  *list_langs = true;
316  noocr = true;
317  }
318 
319  if (*outputbase == NULL && noocr == false) {
320  PrintHelpMessage(argv[0]);
321  exit(1);
322  }
323 }
324 
328  tesseract::PageSegMode pagesegmode, const char* outputbase) {
329  if (pagesegmode == tesseract::PSM_OSD_ONLY) {
330  renderers->push_back(new tesseract::TessOsdRenderer(outputbase));
331  } else {
332  bool b;
333  api->GetBoolVariable("tessedit_create_hocr", &b);
334  if (b) {
335  bool font_info;
336  api->GetBoolVariable("hocr_font_info", &font_info);
337  renderers->push_back(
338  new tesseract::TessHOcrRenderer(outputbase, font_info));
339  }
340 
341  api->GetBoolVariable("tessedit_create_tsv", &b);
342  if (b) {
343  bool font_info;
344  api->GetBoolVariable("hocr_font_info", &font_info);
345  renderers->push_back(
346  new tesseract::TessTsvRenderer(outputbase, font_info));
347  }
348 
349  api->GetBoolVariable("tessedit_create_pdf", &b);
350  if (b) {
351  bool textonly;
352  api->GetBoolVariable("textonly_pdf", &textonly);
353  renderers->push_back(new tesseract::TessPDFRenderer(
354  outputbase, api->GetDatapath(), textonly));
355  }
356 
357  api->GetBoolVariable("tessedit_write_unlv", &b);
358  if (b) {
359  renderers->push_back(new tesseract::TessUnlvRenderer(outputbase));
360  }
361 
362  api->GetBoolVariable("tessedit_create_boxfile", &b);
363  if (b) {
364  renderers->push_back(new tesseract::TessBoxTextRenderer(outputbase));
365  }
366 
367  api->GetBoolVariable("tessedit_create_txt", &b);
368  if (b || renderers->empty()) {
369  renderers->push_back(new tesseract::TessTextRenderer(outputbase));
370  }
371  }
372 
373  if (!renderers->empty()) {
374  // Since the PointerVector auto-deletes, null-out the renderers that are
375  // added to the root, and leave the root in the vector.
376  for (int r = 1; r < renderers->size(); ++r) {
377  (*renderers)[0]->insert((*renderers)[r]);
378  (*renderers)[r] = NULL;
379  }
380  }
381 }
382 
383 /**********************************************************************
384  * main()
385  *
386  **********************************************************************/
387 
388 int main(int argc, char** argv) {
389  const char* lang = "eng";
390  const char* image = NULL;
391  const char* outputbase = NULL;
392  const char* datapath = NULL;
393  bool list_langs = false;
394  bool print_parameters = false;
395  int arg_i = 1;
398  /* main() calls functions like ParseArgs which call exit().
399  * This results in memory leaks if vars_vec and vars_values are
400  * declared as auto variables (destructor is not called then). */
401  static GenericVector<STRING> vars_vec;
402  static GenericVector<STRING> vars_values;
403 
404 #ifdef NDEBUG
405  // Disable debugging and informational messages from Leptonica.
406  setMsgSeverity(L_SEVERITY_ERROR);
407 #endif
408 
409 #if defined(HAVE_TIFFIO_H) && defined(_WIN32)
410  /* Show libtiff warnings on console (not in GUI). */
411  TIFFSetWarningHandler(Win32WarningHandler);
412 #endif /* HAVE_TIFFIO_H && _WIN32 */
413 
414  ParseArgs(argc, argv, &lang, &image, &outputbase, &datapath, &list_langs,
415  &print_parameters, &vars_vec, &vars_values, &arg_i, &pagesegmode,
416  &enginemode);
417 
418  bool banner = false;
419  if (outputbase != NULL && strcmp(outputbase, "-") &&
420  strcmp(outputbase, "stdout")) {
421  banner = true;
422  }
423 
424  PERF_COUNT_START("Tesseract:main")
425 
426  // Call GlobalDawgCache here to create the global DawgCache object before
427  // the TessBaseAPI object. This fixes the order of destructor calls:
428  // first TessBaseAPI must be destructed, DawgCache must be the last object.
430 
431  // Avoid memory leak caused by auto variable when exit() is called.
433 
434  api.SetOutputName(outputbase);
435 
436  int init_failed = api.Init(datapath, lang, enginemode, &(argv[arg_i]),
437  argc - arg_i, &vars_vec, &vars_values, false);
438  if (init_failed) {
439  fprintf(stderr, "Could not initialize tesseract.\n");
440  return EXIT_FAILURE;
441  }
442 
443  SetVariablesFromCLArgs(&api, argc, argv);
444 
445  if (list_langs) {
447  return EXIT_SUCCESS;
448  }
449 
450  if (print_parameters) {
451  FILE* fout = stdout;
452  fprintf(stdout, "Tesseract parameters:\n");
453  api.PrintVariables(fout);
454  api.End();
455  return EXIT_SUCCESS;
456  }
457 
458  FixPageSegMode(&api, pagesegmode);
459 
460  if (pagesegmode == tesseract::PSM_AUTO_ONLY) {
461  int ret_val = EXIT_SUCCESS;
462 
463  Pix* pixs = pixRead(image);
464  if (!pixs) {
465  fprintf(stderr, "Cannot open input file: %s\n", image);
466  return 2;
467  }
468 
469  api.SetImage(pixs);
470 
471  tesseract::Orientation orientation;
474  float deskew_angle;
475 
476  tesseract::PageIterator* it = api.AnalyseLayout();
477  if (it) {
478  it->Orientation(&orientation, &direction, &order, &deskew_angle);
479  tprintf(
480  "Orientation: %d\nWritingDirection: %d\nTextlineOrder: %d\n"
481  "Deskew angle: %.4f\n",
482  orientation, direction, order, deskew_angle);
483  } else {
484  ret_val = EXIT_FAILURE;
485  }
486 
487  delete it;
488 
489  pixDestroy(&pixs);
490  return ret_val;
491  }
492 
493  // set in_training_mode to true when using one of these configs:
494  // ambigs.train, box.train, box.train.stderr, linebox, rebox
495  bool b = false;
496  bool in_training_mode =
497  (api.GetBoolVariable("tessedit_ambigs_training", &b) && b) ||
498  (api.GetBoolVariable("tessedit_resegment_from_boxes", &b) && b) ||
499  (api.GetBoolVariable("tessedit_make_boxes_from_boxes", &b) && b);
500 
501  // Avoid memory leak caused by auto variable when exit() is called.
503 
504  if (in_training_mode) {
505  renderers.push_back(NULL);
506  } else {
507  PreloadRenderers(&api, &renderers, pagesegmode, outputbase);
508  }
509 
510  if (!renderers.empty()) {
511  if (banner) PrintBanner();
512  bool succeed = api.ProcessPages(image, NULL, 0, renderers[0]);
513  if (!succeed) {
514  fprintf(stderr, "Error during processing.\n");
515  return EXIT_FAILURE;
516  }
517  }
518 
520 
521  return EXIT_SUCCESS;
522 }
void PrintLangsList(tesseract::TessBaseAPI *api)
void PreloadRenderers(tesseract::TessBaseAPI *api, tesseract::PointerVector< tesseract::TessResultRenderer > *renderers, tesseract::PageSegMode pagesegmode, const char *outputbase)
Orientation and script detection only.
Definition: publictypes.h:152
void PrintHelpForOEM()
void SetVariablesFromCLArgs(tesseract::TessBaseAPI *api, int argc, char **argv)
void Orientation(tesseract::Orientation *orientation, tesseract::WritingDirection *writing_direction, tesseract::TextlineOrder *textline_order, float *deskew_angle) const
void PrintHelpForPSM()
int push_back(T object)
Automatic page segmentation, but no OSD, or OCR.
Definition: publictypes.h:155
void FixPageSegMode(tesseract::TessBaseAPI *api, tesseract::PageSegMode pagesegmode)
void PrintHelpMessage(const char *program)
int main(int argc, char **argv)
void PrintBanner()
#define PERF_COUNT_START(FUNCT_NAME)
int direction(EDGEPT *point)
Definition: vecfuncs.cpp:43
#define tprintf(...)
Definition: tprintf.h:31
Definition: strngs.h:44
int size() const
Definition: genericvector.h:72
Fully automatic page segmentation, but no OSD.
Definition: publictypes.h:156
Assume a single uniform block of text. (Default.)
Definition: publictypes.h:160
static DawgCache * GlobalDawgCache()
Definition: dict.cpp:198
#define PERF_COUNT_END
void PrintVersionInfo()
static const char * Version()
Definition: baseapi.cpp:140
bool empty() const
Definition: genericvector.h:84
void PrintUsage(const char *program)
void ParseArgs(const int argc, char **argv, const char **lang, const char **image, const char **outputbase, const char **datapath, bool *list_langs, bool *print_parameters, GenericVector< STRING > *vars_vec, GenericVector< STRING > *vars_values, int *arg_i, tesseract::PageSegMode *pagesegmode, tesseract::OcrEngineMode *enginemode)