/* * This class is part of the book "iText in Action - 2nd Edition" * written by Bruno Lowagie (ISBN: 9781935182610) * For more info, go to: http://itextpdf.com/examples/ * This example only works with the AGPL version of iText. */ package part4.chapter15; import java.io.FileOutputStream; import java.io.IOException; import java.io.PrintWriter; import com.itextpdf.text.pdf.PdfReader; import com.itextpdf.text.pdf.parser.LocationTextExtractionStrategy; import com.itextpdf.text.pdf.parser.PdfReaderContentParser; import com.itextpdf.text.pdf.parser.TextExtractionStrategy; public class ExtractPageContentSorted1 { /** The original PDF that will be parsed. */ public static final String PREFACE = "resources/pdfs/preface.pdf"; /** The resulting text file. */ public static final String RESULT = "results/part4/chapter15/preface_sorted1.txt"; /** * Parses a PDF to a plain text file. * @param pdf the original PDF * @param txt the resulting text * @throws IOException */ public void parsePdf(String pdf, String txt) throws IOException { PdfReader reader = new PdfReader(pdf); PdfReaderContentParser parser = new PdfReaderContentParser(reader); PrintWriter out = new PrintWriter(new FileOutputStream(txt)); TextExtractionStrategy strategy; for (int i = 1; i <= reader.getNumberOfPages(); i++) { strategy = parser.processContent(i, new LocationTextExtractionStrategy()); out.println(strategy.getResultantText()); } out.flush(); out.close(); reader.close(); } /** * Main method. * @param args no arguments needed * @throws IOException */ public static void main(String[] args) throws IOException { new ExtractPageContentSorted1().parsePdf(PREFACE, RESULT); } }