#include <renderer.h>
Renders tesseract output into searchable PDF
Definition at line 186 of file renderer.h.
◆ TessPDFRenderer() [1/2]
tesseract::TessPDFRenderer::TessPDFRenderer |
( |
const char * |
outputbase, |
|
|
const char * |
datadir |
|
) |
| |
Definition at line 186 of file pdfrenderer.cpp.
TessPDFRenderer(const char *outputbase, const char *datadir)
TessResultRenderer(const char *outputbase, const char *extension)
◆ TessPDFRenderer() [2/2]
tesseract::TessPDFRenderer::TessPDFRenderer |
( |
const char * |
outputbase, |
|
|
const char * |
datadir, |
|
|
bool |
textonly |
|
) |
| |
Definition at line 191 of file pdfrenderer.cpp.
196 textonly_ = textonly;
TessResultRenderer(const char *outputbase, const char *extension)
◆ AddImageHandler()
bool tesseract::TessPDFRenderer::AddImageHandler |
( |
TessBaseAPI * |
api | ) |
|
|
protectedvirtual |
Implements tesseract::TessResultRenderer.
Definition at line 853 of file pdfrenderer.cpp.
855 char buf[kBasicBufSize];
856 char buf2[kBasicBufSize];
857 Pix *pix =
api->GetInputImage();
859 int ppi =
api->GetSourceYResolution();
860 if (!pix || ppi <= 0)
862 double width = pixGetWidth(pix) * 72.0 / ppi;
863 double height = pixGetHeight(pix) * 72.0 / ppi;
865 snprintf(buf2,
sizeof(buf2),
"/XObject << /Im1 %ld 0 R >>\n", obj_ + 2);
866 const char *xobject = (textonly_) ?
"" : buf2;
869 n = snprintf(buf,
sizeof(buf),
874 " /MediaBox [0 0 %.2f %.2f]\n" 875 " /Contents %ld 0 R\n" 879 " /ProcSet [ /PDF /Text /ImageB /ImageI /ImageC ]\n" 880 " /Font << /f-0-0 %ld 0 R >>\n" 890 if (n >=
sizeof(buf))
return false;
892 AppendPDFObject(buf);
895 char* pdftext = GetPDFTextObjects(
api, width, height);
896 long pdftext_len = strlen(pdftext);
897 unsigned char *pdftext_casted =
reinterpret_cast<unsigned char *
>(pdftext);
899 unsigned char *comp_pdftext =
900 zlibCompress(pdftext_casted, pdftext_len, &len);
901 long comp_pdftext_len = len;
902 n = snprintf(buf,
sizeof(buf),
905 " /Length %ld /Filter /FlateDecode\n" 907 "stream\n", obj_, comp_pdftext_len);
908 if (n >=
sizeof(buf)) {
910 lept_free(comp_pdftext);
914 long objsize = strlen(buf);
915 AppendData(reinterpret_cast<char *>(comp_pdftext), comp_pdftext_len);
916 objsize += comp_pdftext_len;
917 lept_free(comp_pdftext);
923 objsize += strlen(b2);
924 AppendPDFObjectDIY(objsize);
927 char *pdf_object = NULL;
928 if (!imageToPDFObj(pix,
filename, obj_, &pdf_object, &objsize)) {
932 AppendPDFObjectDIY(objsize);
void AppendString(const char *s)
void AppendData(const char *s, int len)
◆ BeginDocumentHandler()
bool tesseract::TessPDFRenderer::BeginDocumentHandler |
( |
| ) |
|
|
protectedvirtual |
Reimplemented from tesseract::TessResultRenderer.
Definition at line 506 of file pdfrenderer.cpp.
507 char buf[kBasicBufSize];
510 n = snprintf(buf,
sizeof(buf),
513 0xDE, 0xAD, 0xBE, 0xEB);
514 if (n >=
sizeof(buf))
return false;
515 AppendPDFObject(buf);
518 n = snprintf(buf,
sizeof(buf),
526 if (n >=
sizeof(buf))
return false;
527 AppendPDFObject(buf);
535 n = snprintf(buf,
sizeof(buf),
538 " /BaseFont /GlyphLessFont\n" 539 " /DescendantFonts [ %ld 0 R ]\n" 540 " /Encoding /Identity-H\n" 542 " /ToUnicode %ld 0 R\n" 549 if (n >=
sizeof(buf))
return false;
550 AppendPDFObject(buf);
553 n = snprintf(buf,
sizeof(buf),
556 " /BaseFont /GlyphLessFont\n" 557 " /CIDToGIDMap %ld 0 R\n" 560 " /Ordering (Identity)\n" 561 " /Registry (Adobe)\n" 564 " /FontDescriptor %ld 0 R\n" 565 " /Subtype /CIDFontType2\n" 573 if (n >=
sizeof(buf))
return false;
574 AppendPDFObject(buf);
577 const int kCIDToGIDMapSize = 2 * (1 << 16);
578 unsigned char *cidtogidmap =
new unsigned char[kCIDToGIDMapSize];
579 for (
int i = 0; i < kCIDToGIDMapSize; i++) {
580 cidtogidmap[i] = (i % 2) ? 1 : 0;
583 unsigned char *comp =
584 zlibCompress(cidtogidmap, kCIDToGIDMapSize, &len);
585 delete[] cidtogidmap;
586 n = snprintf(buf,
sizeof(buf),
589 " /Length %lu /Filter /FlateDecode\n" 593 if (n >=
sizeof(buf)) {
598 long objsize = strlen(buf);
599 AppendData(reinterpret_cast<char *>(comp), len);
602 const char *endstream_endobj =
606 objsize += strlen(endstream_endobj);
607 AppendPDFObjectDIY(objsize);
610 "/CIDInit /ProcSet findresource begin\n" 615 " /Registry (Adobe)\n" 619 "/CMapName /Adobe-Identify-UCS def\n" 621 "1 begincodespacerange\n" 623 "endcodespacerange\n" 625 "<0000> <FFFF> <0000>\n" 628 "CMapName currentdict /CMap defineresource pop\n" 633 n = snprintf(buf,
sizeof(buf),
635 "<< /Length %lu >>\n" 639 "endobj\n", (
unsigned long) strlen(stream), stream);
640 if (n >=
sizeof(buf))
return false;
641 AppendPDFObject(buf);
644 n = snprintf(buf,
sizeof(buf),
651 " /FontBBox [ 0 0 %d %d ]\n" 652 " /FontFile2 %ld 0 R\n" 653 " /FontName /GlyphLessFont\n" 656 " /Type /FontDescriptor\n" 665 if (n >=
sizeof(buf))
return false;
666 AppendPDFObject(buf);
668 n = snprintf(buf,
sizeof(buf),
"%s/pdf.ttf", datadir_);
669 if (n >=
sizeof(buf))
return false;
670 FILE *fp = fopen(buf,
"rb");
672 tprintf(
"Can not open file \"%s\"!\n", buf);
675 fseek(fp, 0, SEEK_END);
676 long int size = ftell(fp);
677 fseek(fp, 0, SEEK_SET);
678 char *buffer =
new char[size];
679 if (fread(buffer, 1, size, fp) != size) {
686 n = snprintf(buf,
sizeof(buf),
692 "stream\n", size, size);
693 if (n >=
sizeof(buf)) {
698 objsize = strlen(buf);
703 objsize += strlen(endstream_endobj);
704 AppendPDFObjectDIY(objsize);
void AppendString(const char *s)
void AppendData(const char *s, int len)
◆ EndDocumentHandler()
bool tesseract::TessPDFRenderer::EndDocumentHandler |
( |
| ) |
|
|
protectedvirtual |
Reimplemented from tesseract::TessResultRenderer.
Definition at line 939 of file pdfrenderer.cpp.
941 char buf[kBasicBufSize];
950 const long int kPagesObjectNumber = 2;
951 offsets_[kPagesObjectNumber] = offsets_.
back();
952 n = snprintf(buf,
sizeof(buf),
956 " /Kids [ ", kPagesObjectNumber);
957 if (n >=
sizeof(buf))
return false;
959 size_t pages_objsize = strlen(buf);
960 for (
size_t i = 0; i < pages_.
size(); i++) {
961 n = snprintf(buf,
sizeof(buf),
962 "%ld 0 R ", pages_[i]);
963 if (n >=
sizeof(buf))
return false;
965 pages_objsize += strlen(buf);
967 n = snprintf(buf,
sizeof(buf),
971 "endobj\n", pages_.
size());
972 if (n >=
sizeof(buf))
return false;
974 pages_objsize += strlen(buf);
975 offsets_.
back() += pages_objsize;
978 STRING utf16_title =
"FEFF";
981 char utf16[kMaxBytesPerCodepoint];
982 for (
int i = 0; i < unicodes.
length(); i++) {
983 int code = unicodes[i];
985 utf16_title += utf16;
989 char* datestr = l_getFormattedDate();
990 n = snprintf(buf,
sizeof(buf),
993 " /Producer (Tesseract %s)\n" 994 " /CreationDate (D:%s)\n" 1000 if (n >=
sizeof(buf))
return false;
1001 AppendPDFObject(buf);
1002 n = snprintf(buf,
sizeof(buf),
1005 "0000000000 65535 f \n", obj_);
1006 if (n >=
sizeof(buf))
return false;
1008 for (
int i = 1; i < obj_; i++) {
1009 n = snprintf(buf,
sizeof(buf),
"%010ld 00000 n \n", offsets_[i]);
1010 if (n >=
sizeof(buf))
return false;
1013 n = snprintf(buf,
sizeof(buf),
1027 if (n >=
sizeof(buf))
return false;
bool CodepointToUtf16be(int code, char utf16[kMaxBytesPerCodepoint])
static bool UTF8ToUnicode(const char *utf8_str, GenericVector< int > *unicodes)
const char * title() const
const char * c_str() const
void AppendString(const char *s)
#define TESSERACT_VERSION_STR
The documentation for this class was generated from the following files: