Fixes to PDF article

This commit is contained in:
michal_aibin
2016-11-05 21:58:25 +01:00
parent c29601db41
commit 9332eb5210
3 changed files with 23 additions and 49 deletions
@@ -23,17 +23,13 @@ public class PDF2HTMLExample {
}
private static void generateHTMLFromPDF(String filename) throws ParserConfigurationException, IOException {
try {
PDDocument pdf = PDDocument.load(new File(filename));
PDFDomTree parser = new PDFDomTree();
Writer output = new PrintWriter("src/output/pdf.html", "utf-8");
parser.writeText(pdf, output);
output.close();
if (pdf != null) {
pdf.close();
}
} catch (IOException e) {
e.printStackTrace();
PDDocument pdf = PDDocument.load(new File(filename));
PDFDomTree parser = new PDFDomTree();
Writer output = new PrintWriter("src/output/pdf.html", "utf-8");
parser.writeText(pdf, output);
output.close();
if (pdf != null) {
pdf.close();
}
}
}
@@ -23,30 +23,26 @@ public class PDF2TextExample {
}
private static void generateTxtFromPDF(String filename) throws IOException {
try {
File f = new File(filename);
String parsedText;
PDFParser parser = new PDFParser(new RandomAccessFile(f, "r"));
parser.parse();
File f = new File(filename);
String parsedText;
PDFParser parser = new PDFParser(new RandomAccessFile(f, "r"));
parser.parse();
COSDocument cosDoc = parser.getDocument();
COSDocument cosDoc = parser.getDocument();
PDFTextStripper pdfStripper = new PDFTextStripper();
PDDocument pdDoc = new PDDocument(cosDoc);
PDFTextStripper pdfStripper = new PDFTextStripper();
PDDocument pdDoc = new PDDocument(cosDoc);
parsedText = pdfStripper.getText(pdDoc);
parsedText = pdfStripper.getText(pdDoc);
if (cosDoc != null)
cosDoc.close();
if (pdDoc != null)
pdDoc.close();
PrintWriter pw = new PrintWriter("src/output/pdf.txt");
pw.print(parsedText);
pw.close();
} catch (Exception e) {
e.printStackTrace();
}
if (cosDoc != null)
cosDoc.close();
if (pdDoc != null)
pdDoc.close();
PrintWriter pw = new PrintWriter("src/output/pdf.txt");
pw.print(parsedText);
pw.close();
}
}