tesseract  3.05.02
renderer.h
Go to the documentation of this file.
1 // File: renderer.h
3 // Description: Rendering interface to inject into TessBaseAPI
4 //
5 // (C) Copyright 2011, Google Inc.
6 // Licensed under the Apache License, Version 2.0 (the "License");
7 // you may not use this file except in compliance with the License.
8 // You may obtain a copy of the License at
9 // http://www.apache.org/licenses/LICENSE-2.0
10 // Unless required by applicable law or agreed to in writing, software
11 // distributed under the License is distributed on an "AS IS" BASIS,
12 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 // See the License for the specific language governing permissions and
14 // limitations under the License.
15 //
17 
18 #ifndef TESSERACT_API_RENDERER_H_
19 #define TESSERACT_API_RENDERER_H_
20 
21 // To avoid collision with other typenames include the ABSOLUTE MINIMUM
22 // complexity of includes here. Use forward declarations wherever possible
23 // and hide includes of complex types in baseapi.cpp.
24 #include "genericvector.h"
25 #include "platform.h"
26 #include "publictypes.h"
27 
28 namespace tesseract {
29 
30 class TessBaseAPI;
31 
46  public:
47  virtual ~TessResultRenderer();
48 
49  // Takes ownership of pointer so must be new'd instance.
50  // Renderers aren't ordered, but appends the sequences of next parameter
51  // and existing next(). The renderers should be unique across both lists.
52  void insert(TessResultRenderer* next);
53 
54  // Returns the next renderer or NULL.
55  TessResultRenderer* next() { return next_; }
56 
62  bool BeginDocument(const char* title);
63 
72  bool AddImage(TessBaseAPI* api);
73 
78  bool EndDocument();
79 
80  const char* file_extension() const { return file_extension_; }
81  const char* title() const { return title_.c_str(); }
82 
92  int imagenum() const { return imagenum_; }
93 
94  protected:
105  TessResultRenderer(const char *outputbase,
106  const char* extension);
107 
108  // Hook for specialized handling in BeginDocument()
109  virtual bool BeginDocumentHandler();
110 
111  // This must be overriden to render the OCR'd results
112  virtual bool AddImageHandler(TessBaseAPI* api) = 0;
113 
114  // Hook for specialized handling in EndDocument()
115  virtual bool EndDocumentHandler();
116 
117  // Renderers can call this to append '\0' terminated strings into
118  // the output string returned by GetOutput.
119  // This method will grow the output buffer if needed.
120  void AppendString(const char* s);
121 
122  // Renderers can call this to append binary byte sequences into
123  // the output string returned by GetOutput. Note that s is not necessarily
124  // '\0' terminated (and can contain '\0' within it).
125  // This method will grow the output buffer if needed.
126  void AppendData(const char* s, int len);
127 
128  private:
129  const char* file_extension_; // standard extension for generated output
130  STRING title_; // title of document being renderered
131  int imagenum_; // index of last image added
132 
133  FILE* fout_; // output file pointer
134  TessResultRenderer* next_; // Can link multiple renderers together
135  bool happy_; // I get grumpy when the disk fills up, etc.
136 };
137 
142  public:
143  explicit TessTextRenderer(const char *outputbase);
144 
145  protected:
146  virtual bool AddImageHandler(TessBaseAPI* api);
147 };
148 
153  public:
154  explicit TessHOcrRenderer(const char *outputbase, bool font_info);
155  explicit TessHOcrRenderer(const char *outputbase);
156 
157  protected:
158  virtual bool BeginDocumentHandler();
159  virtual bool AddImageHandler(TessBaseAPI* api);
160  virtual bool EndDocumentHandler();
161 
162  private:
163  bool font_info_; // whether to print font information
164 };
165 
170  public:
171  explicit TessTsvRenderer(const char* outputbase, bool font_info);
172  explicit TessTsvRenderer(const char* outputbase);
173 
174  protected:
175  virtual bool BeginDocumentHandler();
176  virtual bool AddImageHandler(TessBaseAPI* api);
177  virtual bool EndDocumentHandler();
178 
179  private:
180  bool font_info_; // whether to print font information
181 };
182 
187  public:
188  // datadir is the location of the TESSDATA. We need it because
189  // we load a custom PDF font from this location.
190  TessPDFRenderer(const char* outputbase, const char* datadir);
191  TessPDFRenderer(const char* outputbase, const char* datadir, bool textonly);
192 
193  protected:
194  virtual bool BeginDocumentHandler();
195  virtual bool AddImageHandler(TessBaseAPI* api);
196  virtual bool EndDocumentHandler();
197 
198  private:
199  // We don't want to have every image in memory at once,
200  // so we store some metadata as we go along producing
201  // PDFs one page at a time. At the end, that metadata is
202  // used to make everything that isn't easily handled in a
203  // streaming fashion.
204  long int obj_; // counter for PDF objects
205  GenericVector<long int> offsets_; // offset of every PDF object in bytes
206  GenericVector<long int> pages_; // object number for every /Page object
207  const char *datadir_; // where to find the custom font
208  bool textonly_; // skip images if set
209  // Bookkeeping only. DIY = Do It Yourself.
210  void AppendPDFObjectDIY(size_t objectsize);
211  // Bookkeeping + emit data.
212  void AppendPDFObject(const char *data);
213  // Create the /Contents object for an entire page.
214  char* GetPDFTextObjects(TessBaseAPI* api, double width, double height);
215  // Turn an image into a PDF object. Only transcode if we have to.
216  static bool imageToPDFObj(Pix *pix, char *filename, long int objnum,
217  char **pdf_object, long int *pdf_object_size);
218 };
219 
220 
225  public:
226  explicit TessUnlvRenderer(const char *outputbase);
227 
228  protected:
229  virtual bool AddImageHandler(TessBaseAPI* api);
230 };
231 
236  public:
237  explicit TessBoxTextRenderer(const char *outputbase);
238 
239  protected:
240  virtual bool AddImageHandler(TessBaseAPI* api);
241 };
242 
247  public:
248  explicit TessOsdRenderer(const char* outputbase);
249 
250  protected:
251  virtual bool AddImageHandler(TessBaseAPI* api);
252 };
253 
254 } // namespace tesseract.
255 
256 #endif // TESSERACT_API_RENDERER_H_
void insert(LIST list, void *node)
Definition: oldlist.cpp:215
struct TessUnlvRenderer TessUnlvRenderer
Definition: capi.h:84
struct TessPDFRenderer TessPDFRenderer
Definition: capi.h:83
struct TessBaseAPI TessBaseAPI
Definition: capi.h:86
#define TESS_API
Definition: platform.h:81
struct TessTextRenderer TessTextRenderer
Definition: capi.h:81
TessResultRenderer * next()
Definition: renderer.h:55
struct TessHOcrRenderer TessHOcrRenderer
Definition: capi.h:82
const char * file_extension() const
Definition: renderer.h:80
const char * title() const
Definition: renderer.h:81
Definition: strngs.h:44
struct TessResultRenderer TessResultRenderer
Definition: capi.h:80
struct TessBoxTextRenderer TessBoxTextRenderer
Definition: capi.h:85