tesseract  3.05.02
renderer.cpp
Go to the documentation of this file.
1 // File: renderer.cpp
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifdef HAVE_CONFIG_H
19 #include "config_auto.h"
20 #endif
21 
22 #include <string.h>
23 #include "baseapi.h"
24 #include "genericvector.h"
25 #include "renderer.h"
26 
27 namespace tesseract {
28 
29 /**********************************************************************
30  * Base Renderer interface implementation
31  **********************************************************************/
33  const char* extension)
34  : file_extension_(extension),
35  title_(""), imagenum_(-1),
36  fout_(stdout),
37  next_(NULL),
38  happy_(true) {
39  if (strcmp(outputbase, "-") && strcmp(outputbase, "stdout")) {
40  STRING outfile = STRING(outputbase) + STRING(".") + STRING(file_extension_);
41  fout_ = fopen(outfile.string(), "wb");
42  if (fout_ == NULL) {
43  happy_ = false;
44  }
45  }
46 }
47 
49  if (fout_ != NULL) {
50  if (fout_ != stdout)
51  fclose(fout_);
52  else
53  clearerr(fout_);
54  }
55  delete next_;
56 }
57 
59  if (next == NULL) return;
60 
61  TessResultRenderer* remainder = next_;
62  next_ = next;
63  if (remainder) {
64  while (next->next_ != NULL) {
65  next = next->next_;
66  }
67  next->next_ = remainder;
68  }
69 }
70 
71 bool TessResultRenderer::BeginDocument(const char* title) {
72  if (!happy_) return false;
73  title_ = title;
74  imagenum_ = -1;
75  bool ok = BeginDocumentHandler();
76  if (next_) {
77  ok = next_->BeginDocument(title) && ok;
78  }
79  return ok;
80 }
81 
83  if (!happy_) return false;
84  ++imagenum_;
85  bool ok = AddImageHandler(api);
86  if (next_) {
87  ok = next_->AddImage(api) && ok;
88  }
89  return ok;
90 }
91 
93  if (!happy_) return false;
94  bool ok = EndDocumentHandler();
95  if (next_) {
96  ok = next_->EndDocument() && ok;
97  }
98  return ok;
99 }
100 
101 void TessResultRenderer::AppendString(const char* s) {
102  AppendData(s, strlen(s));
103 }
104 
105 void TessResultRenderer::AppendData(const char* s, int len) {
106  int n = fwrite(s, 1, len, fout_);
107  if (n != len) happy_ = false;
108 }
109 
111  return happy_;
112 }
113 
115  return happy_;
116 }
117 
118 
119 /**********************************************************************
120  * UTF8 Text Renderer interface implementation
121  **********************************************************************/
122 TessTextRenderer::TessTextRenderer(const char *outputbase)
123  : TessResultRenderer(outputbase, "txt") {
124 }
125 
127  char* utf8 = api->GetUTF8Text();
128  if (utf8 == NULL) {
129  return false;
130  }
131 
132  AppendString(utf8);
133  delete[] utf8;
134 
135  bool pageBreak = false;
136  api->GetBoolVariable("include_page_breaks", &pageBreak);
137  const char* pageSeparator = api->GetStringVariable("page_separator");
138  if (pageBreak) {
139  AppendString(pageSeparator);
140  }
141 
142  return true;
143 }
144 
145 /**********************************************************************
146  * HOcr Text Renderer interface implementation
147  **********************************************************************/
148 TessHOcrRenderer::TessHOcrRenderer(const char *outputbase)
149  : TessResultRenderer(outputbase, "hocr") {
150  font_info_ = false;
151 }
152 
153 TessHOcrRenderer::TessHOcrRenderer(const char *outputbase, bool font_info)
154  : TessResultRenderer(outputbase, "hocr") {
155  font_info_ = font_info;
156 }
157 
159  AppendString(
160  "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
161  "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"
162  " \"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"
163  "<html xmlns=\"http://www.w3.org/1999/xhtml\" xml:lang=\"en\" "
164  "lang=\"en\">\n <head>\n <title>");
165  AppendString(title());
166  AppendString(
167  "</title>\n"
168  "<meta http-equiv=\"Content-Type\" content=\"text/html;"
169  "charset=utf-8\" />\n"
170  " <meta name='ocr-system' content='tesseract " TESSERACT_VERSION_STR
171  "' />\n"
172  " <meta name='ocr-capabilities' content='ocr_page ocr_carea ocr_par"
173  " ocr_line ocrx_word");
174  if (font_info_)
175  AppendString(
176  " ocrp_lang ocrp_dir ocrp_font ocrp_fsize ocrp_wconf");
177  AppendString(
178  "'/>\n"
179  "</head>\n<body>\n");
180 
181  return true;
182 }
183 
185  AppendString(" </body>\n</html>\n");
186 
187  return true;
188 }
189 
191  char* hocr = api->GetHOCRText(imagenum());
192  if (hocr == NULL) return false;
193 
194  AppendString(hocr);
195  delete[] hocr;
196 
197  return true;
198 }
199 
200 /**********************************************************************
201  * TSV Text Renderer interface implementation
202  **********************************************************************/
203 TessTsvRenderer::TessTsvRenderer(const char* outputbase)
204  : TessResultRenderer(outputbase, "tsv") {
205  font_info_ = false;
206 }
207 
208 TessTsvRenderer::TessTsvRenderer(const char* outputbase, bool font_info)
209  : TessResultRenderer(outputbase, "tsv") {
210  font_info_ = font_info;
211 }
212 
214  // Output TSV column headings
215  AppendString(
216  "level\tpage_num\tblock_num\tpar_num\tline_num\tword_"
217  "num\tleft\ttop\twidth\theight\tconf\ttext\n");
218  return true;
219 }
220 
221 bool TessTsvRenderer::EndDocumentHandler() { return true; }
222 
224  char* tsv = api->GetTSVText(imagenum());
225  if (tsv == NULL) return false;
226 
227  AppendString(tsv);
228  delete[] tsv;
229 
230  return true;
231 }
232 
233 /**********************************************************************
234  * UNLV Text Renderer interface implementation
235  **********************************************************************/
236 TessUnlvRenderer::TessUnlvRenderer(const char *outputbase)
237  : TessResultRenderer(outputbase, "unlv") {
238 }
239 
241  char* unlv = api->GetUNLVText();
242  if (unlv == NULL) return false;
243 
244  AppendString(unlv);
245  delete[] unlv;
246 
247  return true;
248 }
249 
250 /**********************************************************************
251  * BoxText Renderer interface implementation
252  **********************************************************************/
254  : TessResultRenderer(outputbase, "box") {
255 }
256 
258  char* text = api->GetBoxText(imagenum());
259  if (text == NULL) return false;
260 
261  AppendString(text);
262  delete[] text;
263 
264  return true;
265 }
266 
267 /**********************************************************************
268  * Osd Text Renderer interface implementation
269  **********************************************************************/
270 TessOsdRenderer::TessOsdRenderer(const char* outputbase)
271  : TessResultRenderer(outputbase, "osd") {}
272 
274  char* osd = api->GetOsdText(imagenum());
275  if (osd == NULL) return false;
276 
277  AppendString(osd);
278  delete[] osd;
279 
280  return true;
281 }
282 
283 } // namespace tesseract
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:223
virtual bool EndDocumentHandler()
Definition: renderer.cpp:221
void insert(TessResultRenderer *next)
Definition: renderer.cpp:58
virtual bool EndDocumentHandler()
Definition: renderer.cpp:184
bool AddImage(TessBaseAPI *api)
Definition: renderer.cpp:82
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:190
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:257
bool BeginDocument(const char *title)
Definition: renderer.cpp:71
virtual bool AddImageHandler(TessBaseAPI *api)=0
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:126
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:110
TessTsvRenderer(const char *outputbase, bool font_info)
Definition: renderer.cpp:208
TessResultRenderer * next()
Definition: renderer.h:55
TessBoxTextRenderer(const char *outputbase)
Definition: renderer.cpp:253
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:240
const char * string() const
Definition: strngs.cpp:201
TessOsdRenderer(const char *outputbase)
Definition: renderer.cpp:270
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:213
TessUnlvRenderer(const char *outputbase)
Definition: renderer.cpp:236
const char * title() const
Definition: renderer.h:81
virtual bool EndDocumentHandler()
Definition: renderer.cpp:114
virtual bool AddImageHandler(TessBaseAPI *api)
Definition: renderer.cpp:273
Definition: strngs.h:44
void AppendString(const char *s)
Definition: renderer.cpp:101
virtual bool BeginDocumentHandler()
Definition: renderer.cpp:158
void AppendData(const char *s, int len)
Definition: renderer.cpp:105
TessHOcrRenderer(const char *outputbase, bool font_info)
Definition: renderer.cpp:153
#define TESSERACT_VERSION_STR
Definition: baseapi.h:23
TessResultRenderer(const char *outputbase, const char *extension)
Definition: renderer.cpp:32
TessTextRenderer(const char *outputbase)
Definition: renderer.cpp:122