This commit is contained in:
Jonathan Cook
2019-10-23 15:01:44 +02:00
parent db85c8f275
commit 684ec0d2e3
20486 changed files with 1642483 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
/target/
+7
View File
@@ -0,0 +1,7 @@
## PDF
This module contains articles about PDF files.
### Relevant Articles:
- [PDF Conversions in Java](https://www.baeldung.com/pdf-conversions-java)
- [Creating PDF Files in Java](https://www.baeldung.com/java-pdf-creation)
+85
View File
@@ -0,0 +1,85 @@
<?xml version="1.0"?>
<project xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd" xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
<modelVersion>4.0.0</modelVersion>
<artifactId>pdf</artifactId>
<name>pdf</name>
<url>http://maven.apache.org</url>
<parent>
<groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>${pdfbox-tools.version}</version>
<exclusions>
<exclusion>
<artifactId>commons-logging</artifactId>
<groupId>commons-logging</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>net.sf.cssbox</groupId>
<artifactId>pdf2dom</artifactId>
<version>${pdf2dom.version}</version>
<exclusions>
<exclusion>
<artifactId>commons-logging</artifactId>
<groupId>commons-logging</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>${itextpdf.version}</version>
</dependency>
<dependency>
<groupId>com.itextpdf.tool</groupId>
<artifactId>xmlworker</artifactId>
<version>${xmlworker.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${poi-scratchpad.version}</version>
</dependency>
<dependency>
<groupId>org.apache.xmlgraphics</groupId>
<artifactId>batik-transcoder</artifactId>
<version>${batik-transcoder.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${poi-ooxml.version}</version>
</dependency>
</dependencies>
<build>
<finalName>pdf</finalName>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>
</build>
<properties>
<pdfbox-tools.version>2.0.3</pdfbox-tools.version>
<pdf2dom.version>1.6</pdf2dom.version>
<itextpdf.version>5.5.10</itextpdf.version>
<xmlworker.version>5.5.10</xmlworker.version>
<poi-scratchpad.version>3.15</poi-scratchpad.version>
<batik-transcoder.version>1.8</batik-transcoder.version>
<poi-ooxml.version>3.15</poi-ooxml.version>
</properties>
</project>
@@ -0,0 +1,52 @@
package com.baeldung.pdf;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.fit.pdfdom.PDFDomTree;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
public class PDF2HTMLExample {
private static final String PDF = "src/main/resources/pdf.pdf";
private static final String HTML = "src/main/resources/html.html";
public static void main(String[] args) {
try {
generateHTMLFromPDF(PDF);
generatePDFFromHTML(HTML);
} catch (IOException | ParserConfigurationException | DocumentException e) {
e.printStackTrace();
}
}
private static void generateHTMLFromPDF(String filename) throws ParserConfigurationException, IOException {
PDDocument pdf = PDDocument.load(new File(filename));
PDFDomTree parser = new PDFDomTree();
Writer output = new PrintWriter("src/output/pdf.html", "utf-8");
parser.writeText(pdf, output);
output.close();
if (pdf != null) {
pdf.close();
}
}
private static void generatePDFFromHTML(String filename) throws ParserConfigurationException, IOException, DocumentException {
Document document = new Document();
PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream("src/output/html.pdf"));
document.open();
XMLWorkerHelper.getInstance().parseXHtml(writer, document, new FileInputStream(filename));
document.close();
}
}
@@ -0,0 +1,62 @@
package com.baeldung.pdf;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;
import com.itextpdf.text.BadElementException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.PdfWriter;
public class PDF2ImageExample {
private static final String PDF = "src/main/resources/pdf.pdf";
private static final String JPG = "http://cdn2.baeldung.netdna-cdn.com/wp-content/uploads/2016/05/baeldung-rest-widget-main-1.2.0";
private static final String GIF = "https://media.giphy.com/media/l3V0x6kdXUW9M4ONq/giphy";
public static void main(String[] args) {
try {
generateImageFromPDF(PDF, "png");
generateImageFromPDF(PDF, "jpeg");
generateImageFromPDF(PDF, "gif");
generatePDFFromImage(JPG, "jpg");
generatePDFFromImage(GIF, "gif");
} catch (IOException | DocumentException e) {
e.printStackTrace();
}
}
private static void generateImageFromPDF(String filename, String extension) throws IOException {
PDDocument document = PDDocument.load(new File(filename));
PDFRenderer pdfRenderer = new PDFRenderer(document);
for (int page = 0; page < document.getNumberOfPages(); ++page) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
ImageIOUtil.writeImage(bim, String.format("src/output/pdf-%d.%s", page + 1, extension), 300);
}
document.close();
}
private static void generatePDFFromImage(String filename, String extension)
throws IOException, BadElementException, DocumentException {
Document document = new Document();
String input = filename + "." + extension;
String output = "src/output/" + extension + ".pdf";
FileOutputStream fos = new FileOutputStream(output);
PdfWriter writer = PdfWriter.getInstance(document, fos);
writer.open();
document.open();
document.add(Image.getInstance((new URL(input))));
document.close();
writer.close();
}
}
@@ -0,0 +1,84 @@
package com.baeldung.pdf;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.PdfWriter;
public class PDF2TextExample {
private static final String PDF = "src/main/resources/pdf.pdf";
private static final String TXT = "src/main/resources/txt.txt";
public static void main(String[] args) {
try {
generateTxtFromPDF(PDF);
generatePDFFromTxt(TXT);
} catch (IOException | DocumentException e) {
e.printStackTrace();
}
}
private static void generateTxtFromPDF(String filename) throws IOException {
File f = new File(filename);
String parsedText;
PDFParser parser = new PDFParser(new RandomAccessFile(f, "r"));
parser.parse();
COSDocument cosDoc = parser.getDocument();
PDFTextStripper pdfStripper = new PDFTextStripper();
PDDocument pdDoc = new PDDocument(cosDoc);
parsedText = pdfStripper.getText(pdDoc);
if (cosDoc != null)
cosDoc.close();
if (pdDoc != null)
pdDoc.close();
PrintWriter pw = new PrintWriter("src/output/pdf.txt");
pw.print(parsedText);
pw.close();
}
private static void generatePDFFromTxt(String filename) throws IOException, DocumentException {
Document pdfDoc = new Document(PageSize.A4);
PdfWriter.getInstance(pdfDoc, new FileOutputStream("src/output/txt.pdf"))
.setPdfVersion(PdfWriter.PDF_VERSION_1_7);
pdfDoc.open();
Font myfont = new Font();
myfont.setStyle(Font.NORMAL);
myfont.setSize(11);
pdfDoc.add(new Paragraph("\n"));
BufferedReader br = new BufferedReader(new FileReader(filename));
String strLine;
while ((strLine = br.readLine()) != null) {
Paragraph para = new Paragraph(strLine + "\n", myfont);
para.setAlignment(Element.ALIGN_JUSTIFIED);
pdfDoc.add(para);
}
pdfDoc.close();
br.close();
}
}
@@ -0,0 +1,50 @@
package com.baeldung.pdf;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.poi.xwpf.usermodel.BreakType;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
public class PDF2WordExample {
private static final String FILENAME = "src/main/resources/pdf.pdf";
public static void main(String[] args) {
try {
generateDocFromPDF(FILENAME);
} catch (IOException e) {
e.printStackTrace();
}
}
private static void generateDocFromPDF(String filename) throws IOException {
XWPFDocument doc = new XWPFDocument();
String pdf = filename;
PdfReader reader = new PdfReader(pdf);
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
for (int i = 1; i <= reader.getNumberOfPages(); i++) {
TextExtractionStrategy strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
String text = strategy.getResultantText();
XWPFParagraph p = doc.createParagraph();
XWPFRun run = p.createRun();
run.setText(text);
run.addBreak(BreakType.PAGE);
}
FileOutputStream out = new FileOutputStream("src/output/pdf.docx");
doc.write(out);
out.close();
reader.close();
doc.close();
}
}
@@ -0,0 +1,77 @@
package com.baeldung.pdf;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.stream.Stream;
import com.itextpdf.text.BadElementException;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Document;
import com.itextpdf.text.Element;
import com.itextpdf.text.Image;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfWriter;
public class PDFSampleMain {
public static void main(String[] args) {
try {
Document document = new Document();
PdfWriter.getInstance(document, new FileOutputStream("iTextTable.pdf"));
document.open();
PdfPTable table = new PdfPTable(3);
addTableHeader(table);
addRows(table);
addCustomRows(table);
document.add(table);
document.close();
} catch (Exception e) {
e.printStackTrace();
}
}
private static void addTableHeader(PdfPTable table) {
Stream.of("column header 1", "column header 2", "column header 3")
.forEach(columnTitle -> {
PdfPCell header = new PdfPCell();
header.setBackgroundColor(BaseColor.LIGHT_GRAY);
header.setBorderWidth(2);
header.setPhrase(new Phrase(columnTitle));
table.addCell(header);
});
}
private static void addRows(PdfPTable table) {
table.addCell("row 1, col 1");
table.addCell("row 1, col 2");
table.addCell("row 1, col 3");
}
private static void addCustomRows(PdfPTable table) throws URISyntaxException, BadElementException, IOException {
Path path = Paths.get(ClassLoader.getSystemResource("Java_logo.png").toURI());
Image img = Image.getInstance(path.toAbsolutePath().toString());
img.scalePercent(10);
PdfPCell imageCell = new PdfPCell(img);
table.addCell(imageCell);
PdfPCell horizontalAlignCell = new PdfPCell(new Phrase("row 2, col 2"));
horizontalAlignCell.setHorizontalAlignment(Element.ALIGN_CENTER);
table.addCell(horizontalAlignCell);
PdfPCell verticalAlignCell = new PdfPCell(new Phrase("row 2, col 3"));
verticalAlignCell.setVerticalAlignment(Element.ALIGN_BOTTOM);
table.addCell(verticalAlignCell);
}
}
Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

+53
View File
@@ -0,0 +1,53 @@
<!DOCTYPE html>
<html>
<head>
<title>A very simple webpage</title>
</head>
<body>
<h1>A very simple webpage. This is an "h1" level header.</h1>
<h2>This is a level h2 header.</h2>
<h6>This is a level h6 header. Pretty small!</h6>
<p>This is a standard paragraph.</p>
<p align=center>Now I've aligned it in the center of the screen.</p>
<p align=right>Now aligned to the right</p>
<p><b>Bold text</b></p>
<p><strong>Strongly emphasized text</strong> Can you tell the difference vs. bold?</p>
<p><i>Italics</i></p>
<p><em>Emphasized text</em> Just like Italics!</p>
<h2>How about a nice ordered list!</h2>
<ol>
<li>This little piggy went to market</li>
<li>This little piggy went to SB228 class</li>
<li>This little piggy went to an expensive restaurant in Downtown Palo Alto</li>
<li>This little piggy ate too much at Indian Buffet.</li>
<li>This little piggy got lost</li>
</ol>
<h2>Unordered list</h2>
<ul>
<li>First element</li>
<li>Second element</li>
<li>Third element</li>
</ul>
<p>And finally, how about some</p><a href="http://www.google.com/">Links?</a>
<p>Remember, you can view the HTMl code from this or any other page by using the "View Page Source" command of your browser.</p>
</body>
</html>
+13
View File
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
</pattern>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="STDOUT" />
</root>
</configuration>
Binary file not shown.
+3
View File
@@ -0,0 +1,3 @@
Test
Text
Test TEST