JAVA-29281 Create new module Text Processing Libraries Modules (#15479)

This commit is contained in:
anuragkumawat
2023-12-31 08:37:40 -08:00
committed by GitHub
parent 1768eee09f
commit 76bde3ff46
78 changed files with 208 additions and 189 deletions
@@ -0,0 +1,3 @@
## Text Processing Libraries
This module contains modules about Text Processing Libraries.
@@ -0,0 +1,7 @@
## ANTLR
This module contains articles about ANTLR
### Relevant Articles:
- [Java with ANTLR](https://www.baeldung.com/java-antlr)
@@ -0,0 +1,63 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>antlr</artifactId>
<name>antlr</name>
<parent>
<groupId>com.baeldung</groupId>
<artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>org.antlr</groupId>
<artifactId>antlr4-runtime</artifactId>
<version>${antlr.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.antlr</groupId>
<artifactId>antlr4-maven-plugin</artifactId>
<version>${antlr.version}</version>
<executions>
<execution>
<goals>
<goal>antlr4</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>build-helper-maven-plugin</artifactId>
<version>${mojo.version}</version>
<executions>
<execution>
<phase>generate-sources</phase>
<goals>
<goal>add-source</goal>
</goals>
<configuration>
<sources>
<source>${basedir}/target/generated-sources/antlr4</source>
</sources>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<properties>
<antlr.version>4.7.1</antlr.version>
<mojo.version>3.0.0</mojo.version>
</properties>
</project>
@@ -0,0 +1,16 @@
grammar Log;
log : entry+;
entry : timestamp ' ' level ' ' message CRLF;
timestamp : DATE ' ' TIME;
level : 'ERROR' | 'INFO' | 'DEBUG';
message : (TEXT | ' ')+;
fragment DIGIT : [0-9];
fragment TWODIGIT : DIGIT DIGIT;
fragment LETTER : [A-Za-z];
DATE : TWODIGIT TWODIGIT '-' LETTER LETTER LETTER '-' TWODIGIT;
TIME : TWODIGIT ':' TWODIGIT ':' TWODIGIT;
TEXT : LETTER+;
CRLF : '\r'? '\n' | '\r';
@@ -0,0 +1,28 @@
package com.baeldung.antlr.java;
import com.baeldung.antlr.Java8BaseListener;
import com.baeldung.antlr.Java8Parser;
import org.antlr.v4.runtime.tree.TerminalNode;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class UppercaseMethodListener extends Java8BaseListener {
private List<String> errors = new ArrayList<String>();
@Override
public void enterMethodDeclarator(Java8Parser.MethodDeclaratorContext ctx) {
TerminalNode node = ctx.Identifier();
String methodName = node.getText();
if (Character.isUpperCase(methodName.charAt(0))){
errors.add(String.format("Method %s is uppercased!", methodName));
}
}
public List<String> getErrors(){
return Collections.unmodifiableList(errors);
}
}
@@ -0,0 +1,51 @@
package com.baeldung.antlr.log;
import com.baeldung.antlr.LogBaseListener;
import com.baeldung.antlr.LogParser;
import com.baeldung.antlr.log.model.LogLevel;
import com.baeldung.antlr.log.model.LogEntry;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Locale;
public class LogListener extends LogBaseListener {
private static final DateTimeFormatter DEFAULT_DATETIME_FORMATTER
= DateTimeFormatter.ofPattern("yyyy-MMM-dd HH:mm:ss", Locale.ENGLISH);
private List<LogEntry> entries = new ArrayList<>();
private LogEntry currentLogEntry;
@Override
public void enterEntry(LogParser.EntryContext ctx) {
this.currentLogEntry = new LogEntry();
}
@Override
public void exitEntry(LogParser.EntryContext ctx) {
entries.add(currentLogEntry);
}
@Override
public void enterTimestamp(LogParser.TimestampContext ctx) {
currentLogEntry.setTimestamp(LocalDateTime.parse(ctx.getText(), DEFAULT_DATETIME_FORMATTER));
}
@Override
public void enterMessage(LogParser.MessageContext ctx) {
currentLogEntry.setMessage(ctx.getText());
}
@Override
public void enterLevel(LogParser.LevelContext ctx) {
currentLogEntry.setLevel(LogLevel.valueOf(ctx.getText()));
}
public List<LogEntry> getEntries() {
return Collections.unmodifiableList(entries);
}
}
@@ -0,0 +1,35 @@
package com.baeldung.antlr.log.model;
import java.time.LocalDateTime;
public class LogEntry {
private LogLevel level;
private String message;
private LocalDateTime timestamp;
public LogLevel getLevel() {
return level;
}
public void setLevel(LogLevel level) {
this.level = level;
}
public String getMessage() {
return message;
}
public void setMessage(String message) {
this.message = message;
}
public LocalDateTime getTimestamp() {
return timestamp;
}
public void setTimestamp(LocalDateTime timestamp) {
this.timestamp = timestamp;
}
}
@@ -0,0 +1,5 @@
package com.baeldung.antlr.log.model;
public enum LogLevel {
DEBUG, INFO, ERROR
}
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
</pattern>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="STDOUT" />
</root>
</configuration>
@@ -0,0 +1,30 @@
package com.baeldung.antlr;
import com.baeldung.antlr.java.UppercaseMethodListener;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
import org.junit.Test;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;
public class JavaParserUnitTest {
@Test
public void whenOneMethodStartsWithUpperCase_thenOneErrorReturned() throws Exception{
String javaClassContent = "public class SampleClass { void DoSomething(){} }";
Java8Lexer java8Lexer = new Java8Lexer(CharStreams.fromString(javaClassContent));
CommonTokenStream tokens = new CommonTokenStream(java8Lexer);
Java8Parser java8Parser = new Java8Parser(tokens);
ParseTree tree = java8Parser.compilationUnit();
ParseTreeWalker walker = new ParseTreeWalker();
UppercaseMethodListener uppercaseMethodListener = new UppercaseMethodListener();
walker.walk(uppercaseMethodListener, tree);
assertThat(uppercaseMethodListener.getErrors().size(), is(1));
assertThat(uppercaseMethodListener.getErrors().get(0),
is("Method DoSomething is uppercased!"));
}
}
@@ -0,0 +1,36 @@
package com.baeldung.antlr;
import static org.hamcrest.CoreMatchers.is;
import static org.hamcrest.MatcherAssert.assertThat;
import com.baeldung.antlr.log.LogListener;
import com.baeldung.antlr.log.model.LogLevel;
import com.baeldung.antlr.log.model.LogEntry;
import org.antlr.v4.runtime.CharStreams;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.tree.ParseTreeWalker;
import org.junit.Test;
import java.time.LocalDateTime;
public class LogParserUnitTest {
@Test
public void whenLogContainsOneErrorLogEntry_thenOneErrorIsReturned() throws Exception {
String logLines = "2018-May-05 14:20:21 DEBUG entering awesome method\r\n" +
"2018-May-05 14:20:24 ERROR Bad thing happened\r\n";
LogLexer serverLogLexer = new LogLexer(CharStreams.fromString(logLines));
CommonTokenStream tokens = new CommonTokenStream( serverLogLexer );
LogParser logParser = new LogParser(tokens);
ParseTreeWalker walker = new ParseTreeWalker();
LogListener logWalker = new LogListener();
walker.walk(logWalker, logParser.log());
assertThat(logWalker.getEntries().size(), is(2));
LogEntry error = logWalker.getEntries().get(1);
assertThat(error.getLevel(), is(LogLevel.ERROR));
assertThat(error.getMessage(), is("Bad thing happened"));
assertThat(error.getTimestamp(), is(LocalDateTime.of(2018,5,5,14,20,24)));
}
}
@@ -0,0 +1,7 @@
## Apache Tika
This module contains articles about Apache Tika
### Relevant articles:
- [Content Analysis with Apache Tika](https://www.baeldung.com/apache-tika)
@@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>apache-tika</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>apache-tika</name>
<parent>
<groupId>com.baeldung</groupId>
<artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>org.apache.tika</groupId>
<artifactId>tika-parsers</artifactId>
<version>${tika.version}</version>
</dependency>
</dependencies>
<properties>
<tika.version>1.17</tika.version>
</properties>
</project>
@@ -0,0 +1,67 @@
package com.baeldung.tika;
import java.io.IOException;
import java.io.InputStream;
import org.apache.tika.Tika;
import org.apache.tika.detect.DefaultDetector;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class TikaAnalysis {
public static String detectDocTypeUsingDetector(InputStream stream) throws IOException {
Detector detector = new DefaultDetector();
Metadata metadata = new Metadata();
MediaType mediaType = detector.detect(stream, metadata);
return mediaType.toString();
}
public static String detectDocTypeUsingFacade(InputStream stream) throws IOException {
Tika tika = new Tika();
String mediaType = tika.detect(stream);
return mediaType;
}
public static String extractContentUsingParser(InputStream stream) throws IOException, TikaException, SAXException {
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
parser.parse(stream, handler, metadata, context);
return handler.toString();
}
public static String extractContentUsingFacade(InputStream stream) throws IOException, TikaException {
Tika tika = new Tika();
String content = tika.parseToString(stream);
return content;
}
public static Metadata extractMetadatatUsingParser(InputStream stream) throws IOException, SAXException, TikaException {
Parser parser = new AutoDetectParser();
ContentHandler handler = new BodyContentHandler();
Metadata metadata = new Metadata();
ParseContext context = new ParseContext();
parser.parse(stream, handler, metadata, context);
return metadata;
}
public static Metadata extractMetadatatUsingFacade(InputStream stream) throws IOException, TikaException {
Tika tika = new Tika();
Metadata metadata = new Metadata();
tika.parse(stream, metadata);
return metadata;
}
}
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
</pattern>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="STDOUT" />
</root>
</configuration>
@@ -0,0 +1,79 @@
package com.baeldung.tika;
import static org.hamcrest.CoreMatchers.containsString;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import java.io.IOException;
import java.io.InputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.junit.Test;
import org.xml.sax.SAXException;
public class TikaUnitTest {
@Test
public void whenUsingDetector_thenDocumentTypeIsReturned() throws IOException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.txt");
String mediaType = TikaAnalysis.detectDocTypeUsingDetector(stream);
assertEquals("application/pdf", mediaType);
stream.close();
}
@Test
public void whenUsingFacade_thenDocumentTypeIsReturned() throws IOException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.txt");
String mediaType = TikaAnalysis.detectDocTypeUsingFacade(stream);
assertEquals("application/pdf", mediaType);
stream.close();
}
@Test
public void whenUsingParser_thenContentIsReturned() throws IOException, TikaException, SAXException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.docx");
String content = TikaAnalysis.extractContentUsingParser(stream);
assertThat(content, containsString("Apache Tika - a content analysis toolkit"));
assertThat(content, containsString("detects and extracts metadata and text"));
stream.close();
}
@Test
public void whenUsingFacade_thenContentIsReturned() throws IOException, TikaException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.docx");
String content = TikaAnalysis.extractContentUsingFacade(stream);
assertThat(content, containsString("Apache Tika - a content analysis toolkit"));
assertThat(content, containsString("detects and extracts metadata and text"));
stream.close();
}
@Test
public void whenUsingParser_thenMetadataIsReturned() throws IOException, TikaException, SAXException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.xlsx");
Metadata metadata = TikaAnalysis.extractMetadatatUsingParser(stream);
assertEquals("org.apache.tika.parser.DefaultParser", metadata.get("X-Parsed-By"));
assertEquals("Microsoft Office User", metadata.get("Author"));
stream.close();
}
@Test
public void whenUsingFacade_thenMetadataIsReturned() throws IOException, TikaException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.xlsx");
Metadata metadata = TikaAnalysis.extractMetadatatUsingFacade(stream);
assertEquals("org.apache.tika.parser.DefaultParser", metadata.get("X-Parsed-By"));
assertEquals("Microsoft Office User", metadata.get("Author"));
stream.close();
}
}
@@ -0,0 +1,8 @@
## Asciidoctor
This module contains articles about Asciidoctor
### Relevant articles:
- [Generating a Book with Asciidoctor](https://www.baeldung.com/asciidoctor-book)
- [Introduction to Asciidoctor in Java](https://www.baeldung.com/asciidoctor)
@@ -0,0 +1,71 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>asciidoctor</artifactId>
<name>asciidoctor</name>
<parent>
<groupId>com.baeldung</groupId>
<artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>org.asciidoctor</groupId>
<artifactId>asciidoctorj</artifactId>
<version>${asciidoctorj.version}</version>
</dependency>
<dependency>
<groupId>org.asciidoctor</groupId>
<artifactId>asciidoctorj-pdf</artifactId>
<version>${asciidoctorj-pdf.version}</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.asciidoctor</groupId>
<artifactId>asciidoctor-maven-plugin</artifactId>
<version>${asciidoctor-maven-plugin.version}</version>
<dependencies>
<dependency>
<groupId>org.asciidoctor</groupId>
<artifactId>asciidoctorj-pdf</artifactId>
<version>${asciidoctorj-pdf.plugin.version}</version>
</dependency>
</dependencies>
<executions>
<execution>
<id>output-pdf</id>
<phase>generate-resources</phase>
<goals>
<goal>process-asciidoc</goal>
</goals>
</execution>
</executions>
<configuration>
<sourceDirectory>src/docs/asciidoc</sourceDirectory>
<outputDirectory>target/docs/asciidoc</outputDirectory>
<attributes>
<pdf-stylesdir>${project.basedir}/src/themes</pdf-stylesdir>
<pdf-style>custom</pdf-style>
</attributes>
<backend>pdf</backend>
<doctype>book</doctype>
</configuration>
</plugin>
</plugins>
</build>
<properties>
<asciidoctor-maven-plugin.version>2.2.2</asciidoctor-maven-plugin.version>
<asciidoctorj.version>2.5.7</asciidoctorj.version>
<asciidoctorj-pdf.version>2.3.4</asciidoctorj-pdf.version>
<asciidoctorj-pdf.plugin.version>2.3.4</asciidoctorj-pdf.plugin.version>
</properties>
</project>
@@ -0,0 +1,13 @@
:icons: font
= Generating book with AsciiDoctorj
Baeldung
[abstract]
This is the actual content.
== First Section
This is first section of the book where you can include some nice icons like icon:comment[].
You can also create http://www.baeldung.com[links]
@@ -0,0 +1,33 @@
package com.baeldung.asciidoctor;
import static org.asciidoctor.Asciidoctor.Factory.create;
import static org.asciidoctor.OptionsBuilder.options;
import java.io.File;
import java.util.HashMap;
import java.util.Map;
import org.asciidoctor.Asciidoctor;
public class AsciidoctorDemo {
private final Asciidoctor asciidoctor;
AsciidoctorDemo() {
asciidoctor = create();
}
public void generatePDFFromString(final String input) {
final Map<String, Object> options = options().inPlace(true)
.backend("pdf")
.asMap();
final String outfile = asciidoctor.convertFile(new File("sample.adoc"), options);
}
String generateHTMLFromString(final String input) {
return asciidoctor.convert("Hello _Baeldung_!", new HashMap<String, Object>());
}
}
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
</pattern>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="STDOUT" />
</root>
</configuration>
@@ -0,0 +1,13 @@
package com.baeldung.asciidoctor;
import org.junit.Assert;
import org.junit.Test;
public class AsciidoctorDemoIntegrationTest {
@Test
public void givenString_whenConverting_thenResultingHTMLCode() {
final AsciidoctorDemo asciidoctorDemo = new AsciidoctorDemo();
Assert.assertEquals(asciidoctorDemo.generateHTMLFromString("Hello _Baeldung_!"), "<div class=\"paragraph\">\n<p>Hello <em>Baeldung</em>!</p>\n</div>");
}
}
@@ -0,0 +1,29 @@
title_page:
align: left
page:
layout: portrait
margin: [0.75in, 1in, 0.75in, 1in]
size: A4
base:
font_color: #333333
line_height_length: 17
line_height: $base_line_height_length / $base_font_size
link:
font_color: #009900
header:
height: 0.5in
line_height: 1
recto_content:
center: '{document-title}'
verso_content:
center: '{document-title}'
footer:
height: 0.5in
line_height: 1
recto_content:
right: '{chapter-title} | *{page-number}*'
verso_content:
left: '*{page-number}* | {chapter-title}'
@@ -0,0 +1,3 @@
## Relevant articles
- [Editing Existing PDF Files in Java](https://www.baeldung.com/java-edit-existing-pdf)
- [Get Information About a PDF in Java](https://www.baeldung.com/java-pdf-info)
@@ -0,0 +1,75 @@
<?xml version="1.0"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>pdf-2</artifactId>
<name>pdf-2</name>
<url>http://maven.apache.org</url>
<parent>
<groupId>com.baeldung</groupId>
<artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itext7-core</artifactId>
<version>${itextpdf.core.version}</version>
<type>pom</type>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>cleanup</artifactId>
<version>${itextpdf.cleanup.version}</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>${itextpdf.version}</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>${pdfbox.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${poi-ooxml.version}</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>${log4j-api.version}</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-core</artifactId>
<version>${log4j-core.version}</version>
</dependency>
</dependencies>
<build>
<finalName>pdf-2</finalName>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>
</build>
<properties>
<itextpdf.version>5.5.13.3</itextpdf.version>
<itextpdf.core.version>7.2.3</itextpdf.core.version>
<itextpdf.cleanup.version>3.0.1</itextpdf.cleanup.version>
<pdfbox.version>3.0.0</pdfbox.version>
<poi-ooxml.version>5.2.5</poi-ooxml.version>
<log4j-api.version>2.20.0</log4j-api.version>
<log4j-core.version>2.20.0</log4j-core.version>
</properties>
</project>
@@ -0,0 +1,242 @@
package com.baeldung.exceltopdf;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.Iterator;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.HorizontalAlignment;
import org.apache.poi.ss.usermodel.IndexedColors;
import org.apache.poi.ss.usermodel.VerticalAlignment;
import org.apache.poi.xssf.usermodel.XSSFColor;
import org.apache.poi.xssf.usermodel.XSSFFont;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.poi.ss.usermodel.CellType;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Cell;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.FontFactory;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfWriter;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class ExcelToPDFConverter {
private static final Logger logger = LogManager.getLogger(ExcelToPDFConverter.class);
public static XSSFWorkbook readExcelFile(String excelFilePath) throws IOException {
FileInputStream inputStream = new FileInputStream(excelFilePath);
XSSFWorkbook workbook = new XSSFWorkbook(inputStream);
inputStream.close();
return workbook;
}
private static Document createPDFDocument(String pdfFilePath) throws IOException, DocumentException {
Document document = new Document();
PdfWriter.getInstance(document, new FileOutputStream(pdfFilePath));
document.open();
return document;
}
public static void convertExcelToPDF(String excelFilePath, String pdfFilePath) throws IOException, DocumentException {
XSSFWorkbook workbook = readExcelFile(excelFilePath);
Document document = createPDFDocument(pdfFilePath);
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
XSSFSheet worksheet = workbook.getSheetAt(i);
// Add header with sheet name as title
Paragraph title = new Paragraph(worksheet.getSheetName(), new Font(Font.FontFamily.HELVETICA, 18, Font.BOLD));
title.setSpacingAfter(20f);
title.setAlignment(Element.ALIGN_CENTER);
document.add(title);
createAndAddTable(worksheet, document);
// Add a new page for each sheet (except the last one)
if (i < workbook.getNumberOfSheets() - 1) {
document.newPage();
}
}
document.close();
workbook.close();
}
private static void createAndAddTable(XSSFSheet worksheet, Document document) throws DocumentException, IOException {
PdfPTable table = new PdfPTable(worksheet.getRow(0)
.getPhysicalNumberOfCells());
table.setWidthPercentage(100);
addTableHeader(worksheet, table);
addTableData(worksheet, table);
document.add(table);
}
private static void addTableHeader(XSSFSheet worksheet, PdfPTable table) throws DocumentException, IOException {
Row headerRow = worksheet.getRow(0);
for (int i = 0; i < headerRow.getPhysicalNumberOfCells(); i++) {
Cell cell = headerRow.getCell(i);
String headerText = getCellText(cell);
PdfPCell headerCell = new PdfPCell(new Phrase(headerText, getCellStyle(cell)));
setBackgroundColor(cell, headerCell);
setCellAlignment(cell, headerCell);
table.addCell(headerCell);
}
}
public static String getCellText(Cell cell) {
String cellValue;
switch (cell.getCellType()) {
case STRING:
cellValue = cell.getStringCellValue();
break;
case NUMERIC:
cellValue = String.valueOf(BigDecimal.valueOf(cell.getNumericCellValue()));
break;
case BLANK:
default:
cellValue = "";
break;
}
return cellValue;
}
private static void addTableData(XSSFSheet worksheet, PdfPTable table) throws DocumentException, IOException {
Iterator<Row> rowIterator = worksheet.iterator();
while (rowIterator.hasNext()) {
Row row = rowIterator.next();
if (row.getRowNum() == 0) {
continue;
}
for (int i = 0; i < row.getPhysicalNumberOfCells(); i++) {
Cell cell = row.getCell(i);
String cellValue = getCellText(cell);
PdfPCell cellPdf = new PdfPCell(new Phrase(cellValue, getCellStyle(cell)));
setBackgroundColor(cell, cellPdf);
setCellAlignment(cell, cellPdf);
table.addCell(cellPdf);
}
}
}
private static void setBackgroundColor(Cell cell, PdfPCell cellPdf) {
// Set background color
short bgColorIndex = cell.getCellStyle()
.getFillForegroundColor();
if (bgColorIndex != IndexedColors.AUTOMATIC.getIndex()) {
XSSFColor bgColor = (XSSFColor) cell.getCellStyle()
.getFillForegroundColorColor();
if (bgColor != null) {
byte[] rgb = bgColor.getRGB();
if (rgb != null && rgb.length == 3) {
cellPdf.setBackgroundColor(new BaseColor(rgb[0] & 0xFF, rgb[1] & 0xFF, rgb[2] & 0xFF));
}
}
}
}
private static void setCellAlignment(Cell cell, PdfPCell cellPdf) {
CellStyle cellStyle = cell.getCellStyle();
HorizontalAlignment horizontalAlignment = cellStyle.getAlignment();
VerticalAlignment verticalAlignment = cellStyle.getVerticalAlignment();
switch (horizontalAlignment) {
case LEFT:
cellPdf.setHorizontalAlignment(Element.ALIGN_LEFT);
break;
case CENTER:
cellPdf.setHorizontalAlignment(Element.ALIGN_CENTER);
break;
case JUSTIFY:
case FILL:
cellPdf.setVerticalAlignment(Element.ALIGN_JUSTIFIED);
break;
case RIGHT:
cellPdf.setHorizontalAlignment(Element.ALIGN_RIGHT);
break;
}
switch (verticalAlignment) {
case TOP:
cellPdf.setVerticalAlignment(Element.ALIGN_TOP);
break;
case CENTER:
cellPdf.setVerticalAlignment(Element.ALIGN_MIDDLE);
break;
case JUSTIFY:
cellPdf.setVerticalAlignment(Element.ALIGN_JUSTIFIED);
break;
case BOTTOM:
cellPdf.setVerticalAlignment(Element.ALIGN_BOTTOM);
break;
}
}
private static Font getCellStyle(Cell cell) throws DocumentException, IOException {
Font font = new Font();
CellStyle cellStyle = cell.getCellStyle();
org.apache.poi.ss.usermodel.Font cellFont = cell.getSheet()
.getWorkbook()
.getFontAt(cellStyle.getFontIndexAsInt());
short fontColorIndex = cellFont.getColor();
if (fontColorIndex != IndexedColors.AUTOMATIC.getIndex() && cellFont instanceof XSSFFont) {
XSSFColor fontColor = ((XSSFFont) cellFont).getXSSFColor();
if (fontColor != null) {
byte[] rgb = fontColor.getRGB();
if (rgb != null && rgb.length == 3) {
font.setColor(new BaseColor(rgb[0] & 0xFF, rgb[1] & 0xFF, rgb[2] & 0xFF));
}
}
}
if (cellFont.getItalic()) {
font.setStyle(Font.ITALIC);
}
if (cellFont.getStrikeout()) {
font.setStyle(Font.STRIKETHRU);
}
if (cellFont.getUnderline() == 1) {
font.setStyle(Font.UNDERLINE);
}
short fontSize = cellFont.getFontHeightInPoints();
font.setSize(fontSize);
if (cellFont.getBold()) {
font.setStyle(Font.BOLD);
}
String fontName = cellFont.getFontName();
if (FontFactory.isRegistered(fontName)) {
font.setFamily(fontName); // Use extracted font family if supported by iText
} else {
logger.warn("Unsupported font type: {}", fontName);
// - Use a fallback font (e.g., Helvetica)
font.setFamily("Helvetica");
}
return font;
}
public static void main(String[] args) throws DocumentException, IOException {
String excelFilePath = "src/main/resources/excelsample.xlsx";
String pdfFilePath = "src/main/resources/pdfsample.pdf";
convertExcelToPDF(excelFilePath, pdfFilePath);
}
}
@@ -0,0 +1,43 @@
package com.baeldung.pdfedition;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.pdfcleanup.CleanUpProperties;
import com.itextpdf.pdfcleanup.PdfCleanUpLocation;
import com.itextpdf.pdfcleanup.PdfCleanUpTool;
import com.itextpdf.pdfcleanup.PdfCleaner;
import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
public class PdfContentRemover {
private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-cleaned.pdf";
public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer);
removeContentFromDocument(pdfDocument);
pdfDocument.close();
}
private static void removeContentFromDocument(PdfDocument pdfDocument) throws IOException {
// 5.1. remove text
CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
strategy.add(new RegexBasedCleanupStrategy("Baeldung"));
PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
// 5.2. remove other areas
List<PdfCleanUpLocation> cleanUpLocations = Arrays.asList(new PdfCleanUpLocation(1, new Rectangle(10, 50, 90, 70)), new PdfCleanUpLocation(2, new Rectangle(35, 400, 100, 35)));
PdfCleanUpTool cleaner = new PdfCleanUpTool(pdfDocument, cleanUpLocations, new CleanUpProperties());
cleaner.cleanUp();
}
}
@@ -0,0 +1,86 @@
package com.baeldung.pdfedition;
import java.io.IOException;
import java.net.MalformedURLException;
import com.itextpdf.forms.PdfAcroForm;
import com.itextpdf.forms.fields.PdfFormField;
import com.itextpdf.forms.fields.PdfTextFormField;
import com.itextpdf.io.image.ImageData;
import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
import com.itextpdf.kernel.pdf.annot.PdfTextAnnotation;
import com.itextpdf.layout.Document;
import com.itextpdf.layout.element.Image;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.layout.element.Table;
import com.itextpdf.layout.element.Text;
import com.itextpdf.layout.properties.UnitValue;
public class PdfEditor {
private static final String SOURCE = "src/main/resources/baeldung.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-modified.pdf";
public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer);
addContentToDocument(pdfDocument);
}
private static void addContentToDocument(PdfDocument pdfDocument) throws MalformedURLException {
// 4.1. add form
PdfFormField personal = PdfFormField.createEmptyField(pdfDocument);
personal.setFieldName("information");
PdfTextFormField name = PdfFormField.createText(pdfDocument, new Rectangle(35, 400, 100, 30), "name", "");
personal.addKid(name);
PdfAcroForm.getAcroForm(pdfDocument, true)
.addField(personal, pdfDocument.getFirstPage());
// 4.2. add new page
pdfDocument.addNewPage(1);
// 4.3. add annotation
PdfAnnotation ann = new PdfTextAnnotation(new Rectangle(40, 435, 0, 0)).setTitle(new PdfString("name"))
.setContents("Your name");
pdfDocument.getPage(2)
.addAnnotation(ann);
// create document form pdf document
Document document = new Document(pdfDocument);
// 4.4. add an image
ImageData imageData = ImageDataFactory.create("src/main/resources/baeldung.png");
Image image = new Image(imageData).scaleAbsolute(550, 100)
.setFixedPosition(1, 10, 50);
document.add(image);
// 4.5. add a paragraph
Text title = new Text("This is a demo").setFontSize(16);
Text author = new Text("Baeldung tutorials.");
Paragraph p = new Paragraph().setFontSize(8)
.add(title)
.add(" from ")
.add(author);
document.add(p);
// 4.6. add a table
Table table = new Table(UnitValue.createPercentArray(2));
table.addHeaderCell("#");
table.addHeaderCell("company");
table.addCell("name");
table.addCell("baeldung");
document.add(table);
// close the document
// this automatically closes the pdfDocument, which then closes automatically the pdfReader and pdfWriter
document.close();
}
}
@@ -0,0 +1,45 @@
package com.baeldung.pdfedition;
import java.io.IOException;
import com.itextpdf.kernel.colors.ColorConstants;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.pdf.canvas.parser.listener.IPdfTextLocation;
import com.itextpdf.layout.Canvas;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.pdfcleanup.PdfCleaner;
import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
public class PdfTextReplacement {
private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-fixed.pdf";
public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer);
replaceTextContentFromDocument(pdfDocument);
pdfDocument.close();
}
private static void replaceTextContentFromDocument(PdfDocument pdfDocument) throws IOException {
CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
strategy.add(new RegexBasedCleanupStrategy("Baeldung tutorials").setRedactionColor(ColorConstants.WHITE));
PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
for (IPdfTextLocation location : strategy.getResultantLocations()) {
PdfPage page = pdfDocument.getPage(location.getPageNumber() + 1);
PdfCanvas pdfCanvas = new PdfCanvas(page.newContentStreamAfter(), page.getResources(), page.getDocument());
Canvas canvas = new Canvas(pdfCanvas, location.getRectangle());
canvas.add(new Paragraph("HIDDEN").setFontSize(8)
.setMarginTop(0f));
}
}
}
@@ -0,0 +1,31 @@
package com.baeldung.pdfinfo;
import com.itextpdf.text.pdf.PdfReader;
import java.io.IOException;
import java.util.Map;
public class PdfInfoIText {
public static int getNumberOfPages(final String pdfFile) throws IOException {
PdfReader reader = new PdfReader(pdfFile);
int pages = reader.getNumberOfPages();
reader.close();
return pages;
}
public static boolean isPasswordRequired(final String pdfFile) throws IOException {
PdfReader reader = new PdfReader(pdfFile);
boolean isEncrypted = reader.isEncrypted();
reader.close();
return isEncrypted;
}
public static Map<String, String> getInfo(final String pdfFile) throws IOException {
PdfReader reader = new PdfReader(pdfFile);
Map<String, String> info = reader.getInfo();
reader.close();
return info;
}
}
@@ -0,0 +1,36 @@
package com.baeldung.pdfinfo;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import java.io.File;
import java.io.IOException;
public class PdfInfoPdfBox {
public static int getNumberOfPages(final String pdfFile) throws IOException {
File file = new File(pdfFile);
PDDocument document = Loader.loadPDF(file);
int pages = document.getNumberOfPages();
document.close();
return pages;
}
public static boolean isPasswordRequired(final String pdfFile) throws IOException {
File file = new File(pdfFile);
PDDocument document = Loader.loadPDF(file);
boolean isEncrypted = document.isEncrypted();
document.close();
return isEncrypted;
}
public static PDDocumentInformation getInfo(final String pdfFile) throws IOException {
File file = new File(pdfFile);
PDDocument document = Loader.loadPDF(file);
PDDocumentInformation info = document.getDocumentInformation();
document.close();
return info;
}
}
Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB

@@ -0,0 +1,43 @@
%PDF-1.4
%âãÏÓ
3 0 obj
<</Length 751/Filter/FlateDecode>>stream
xœ•WËRÛ0Ýû+î’.Puõ´–@Ÿ”R žÎtéi ¤Íc0d:ü}%K&a¢«daœãsïÑ‘tdªÓ¦’jn ™VXæ, îÞšÛêèG?íz¸˜=>ÁU{×¾kþTÇéqÚyÞðç høe¸çpŒj¨ÿ±©®«‡Š3'œBàL9.ü@ý]b}säŠú®º-<ŽŠ³ÃBi&bHåX}CiÅT††.àÜûñÙ³5üË
w’ üÐF4?ŒÍKѼ¼É–$îägW&Iš$hDq
'GÈìJ¢Ð(‰B£$
H®ojGȯú]Iš$hD IÅõMµ-¹D I&Iš$Q\ßÔï$M4I"Ð$‰âú¦øb`fyShD I&I;èuH_Ÿ þ‹`-ãaK;h1zUˆ^3\×5„+:órõ9st¹b!i9Ü…dݪ…F±ÚV,Fü×ùŠ"T”`•|{ųõãÓjÑõcÅ—Z¶fJêŠl]^¬5“õa#lVOí<_N9ôq~X¹«v6¥ÄEŒ• *
Xà!ãĬ)òu‹©^3Oû3¤lÈ_ºïÉrú íšp@8Æu‰¯
«ó\ͳ%ê/Ò'ã4E
Ìè“(ùT`ŸwËå3|Y}*ðýêv´OÒ–¸—´Q–$É=FÌ 9£dÑ(ºïÅzö?)›ÛV›mÀ÷úD·.ø¤ER{|¢™Ñ'Uô‰fë–¼*/(í^;åQf⛥DéýaOH¦K{° ¢°ÇPÏìùW–ÅßoÂÛk=˜ƒmÒÁ¢BçÕªáfÎ=ÿŠ‹þ‘]“é^ÑdbxÉdšý½]®»9Lœ¶7gNÞkºŒÙrjÁ`$²yƒÁ>W7û›²Át¯h°)þó˳ÏîÛ~>ëà†íœç‘o“ß÷};Ý7´\©‘Li*hêöTüˆ
endstream
endobj
5 0 obj
<</Type/Page/MediaBox[0 0 595 842]/Resources<</Font<</F1 1 0 R/F2 2 0 R>>>>/Contents 3 0 R/Parent 4 0 R>>
endobj
1 0 obj
<</Type/Font/Subtype/Type1/BaseFont/Helvetica-Bold/Encoding/WinAnsiEncoding>>
endobj
2 0 obj
<</Type/Font/Subtype/Type1/BaseFont/Helvetica/Encoding/WinAnsiEncoding>>
endobj
4 0 obj
<</Type/Pages/Count 1/Kids[5 0 R]>>
endobj
6 0 obj
<</Type/Catalog/Pages 4 0 R>>
endobj
7 0 obj
<</Producer(iText® 5.5.13.3 ©2000-2022 iText Group NV \(AGPL-version\))/CreationDate(D:20231213174247+08'00')/ModDate(D:20231213174247+08'00')>>
endobj
xref
0 8
0000000000 65535 f
0000000954 00000 n
0000001047 00000 n
0000000015 00000 n
0000001135 00000 n
0000000833 00000 n
0000001186 00000 n
0000001231 00000 n
trailer
<</Size 8/Root 6 0 R/Info 7 0 R/ID [<6a28b1036b62f3808f3bfb62a88a5239><6a28b1036b62f3808f3bfb62a88a5239>]>>
@@ -0,0 +1,31 @@
package com.baeldung.pdfinfo;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import java.io.IOException;
import java.util.Map;
import org.junit.jupiter.api.Test;
class PdfInfoITextUnitTest {
private static final String PDF_FILE = "src/test/resources/input.pdf";
@Test
void givenPdf_whenGetNumberOfPages_thenOK() throws IOException {
assertEquals(4, PdfInfoIText.getNumberOfPages(PDF_FILE));
}
@Test
void givenPdf_whenIsPasswordRequired_thenOK() throws IOException {
assertFalse(PdfInfoIText.isPasswordRequired(PDF_FILE));
}
@Test
void givenPdf_whenGetInfo_thenOK() throws IOException {
Map<String, String> info = PdfInfoIText.getInfo(PDF_FILE);
assertEquals("LibreOffice 4.2", info.get("Producer"));
assertEquals("Writer", info.get("Creator"));
}
}
@@ -0,0 +1,31 @@
package com.baeldung.pdfinfo;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.junit.jupiter.api.Test;
import java.io.IOException;
class PdfInfoPdfBoxUnitTest {
private static final String PDF_FILE = "src/test/resources/input.pdf";
@Test
void givenPdf_whenGetNumberOfPages_thenOK() throws IOException {
assertEquals(4, PdfInfoPdfBox.getNumberOfPages(PDF_FILE));
}
@Test
void givenPdf_whenIsPasswordRequired_thenOK() throws IOException {
assertFalse(PdfInfoPdfBox.isPasswordRequired(PDF_FILE));
}
@Test
void givenPdf_whenGetInfo_thenOK() throws IOException {
PDDocumentInformation info = PdfInfoPdfBox.getInfo(PDF_FILE);
assertEquals("LibreOffice 4.2", info.getProducer());
assertEquals("Writer", info.getCreator());
}
}
@@ -0,0 +1 @@
/target/
@@ -0,0 +1,11 @@
## PDF
This module contains articles about PDF files.
### Relevant Articles:
- [PDF Conversions in Java](https://www.baeldung.com/pdf-conversions-java)
- [Creating PDF Files in Java](https://www.baeldung.com/java-pdf-creation)
- [Generating PDF Files Using Thymeleaf](https://www.baeldung.com/thymeleaf-generate-pdf)
- [Java Convert PDF to Base64](https://www.baeldung.com/java-convert-pdf-to-base64)
- [HTML to PDF Using OpenPDF](https://www.baeldung.com/java-html-to-pdf)
- [Reading PDF File Using Java](https://www.baeldung.com/java-pdf-file-read)
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,122 @@
<?xml version="1.0"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>pdf</artifactId>
<name>pdf</name>
<url>http://maven.apache.org</url>
<parent>
<groupId>com.baeldung</groupId>
<artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox-tools</artifactId>
<version>${pdfbox-tools.version}</version>
<exclusions>
<exclusion>
<artifactId>commons-logging</artifactId>
<groupId>commons-logging</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>net.sf.cssbox</groupId>
<artifactId>pdf2dom</artifactId>
<version>${pdf2dom.version}</version>
<exclusions>
<exclusion>
<artifactId>commons-logging</artifactId>
<groupId>commons-logging</groupId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>${itextpdf.version}</version>
</dependency>
<dependency>
<groupId>com.itextpdf.tool</groupId>
<artifactId>xmlworker</artifactId>
<version>${xmlworker.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-scratchpad</artifactId>
<version>${poi-scratchpad.version}</version>
</dependency>
<dependency>
<groupId>org.apache.xmlgraphics</groupId>
<artifactId>batik-transcoder</artifactId>
<version>${batik-transcoder.version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>${poi-ooxml.version}</version>
</dependency>
<dependency>
<groupId>org.thymeleaf</groupId>
<artifactId>thymeleaf</artifactId>
<version>${thymeleaf.version}</version>
</dependency>
<dependency>
<groupId>org.xhtmlrenderer</groupId>
<artifactId>flying-saucer-pdf</artifactId>
<version>${flying-saucer-pdf.version}</version>
</dependency>
<dependency>
<groupId>org.xhtmlrenderer</groupId>
<artifactId>flying-saucer-pdf-openpdf</artifactId>
<version>${flying-saucer-pdf-openpdf.version}</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>
<dependency>
<groupId>com.openhtmltopdf</groupId>
<artifactId>openhtmltopdf-core</artifactId>
<version>${open-html-pdf-core.version}</version>
</dependency>
<dependency>
<groupId>com.openhtmltopdf</groupId>
<artifactId>openhtmltopdf-pdfbox</artifactId>
<version>${open-html-pdfbox.version}</version>
</dependency>
</dependencies>
<build>
<finalName>pdf</finalName>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>
</build>
<properties>
<pdfbox-tools.version>3.0.0</pdfbox-tools.version>
<pdf2dom.version>2.0.1</pdf2dom.version>
<itextpdf.version>5.5.13.3</itextpdf.version>
<xmlworker.version>5.5.10</xmlworker.version>
<poi-scratchpad.version>3.15</poi-scratchpad.version>
<batik-transcoder.version>1.8</batik-transcoder.version>
<poi-ooxml.version>3.15</poi-ooxml.version>
<thymeleaf.version>3.1.2.RELEASE</thymeleaf.version>
<flying-saucer-pdf.version>9.3.1</flying-saucer-pdf.version>
<open-html-pdfbox.version>1.0.6</open-html-pdfbox.version>
<open-html-pdf-core.version>1.0.10</open-html-pdf-core.version>
<flying-saucer-pdf-openpdf.version>9.2.1</flying-saucer-pdf-openpdf.version>
<jsoup.version>1.16.2</jsoup.version>
</properties>
</project>
Binary file not shown.
@@ -0,0 +1,35 @@
package com.baeldung.pdf;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.PdfWriter;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
public class DocxToPDFExample {
public static void main(String[] args) throws IOException, DocumentException {
InputStream docxInputStream = new FileInputStream("input.docx");
try (XWPFDocument document = new XWPFDocument(docxInputStream);
OutputStream pdfOutputStream = new FileOutputStream("output.pdf");) {
Document pdfDocument = new Document();
PdfWriter.getInstance(pdfDocument, pdfOutputStream);
pdfDocument.open();
List<XWPFParagraph> paragraphs = document.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
pdfDocument.add(new Paragraph(paragraph.getText()));
}
pdfDocument.close();
}
}
}
@@ -0,0 +1,52 @@
package com.baeldung.pdf;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.fit.pdfdom.PDFDomTree;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.tool.xml.XMLWorkerHelper;
public class PDF2HTMLExample {
private static final String PDF = "src/main/resources/pdf.pdf";
private static final String HTML = "src/main/resources/html.html";
public static void main(String[] args) {
try {
generateHTMLFromPDF(PDF);
generatePDFFromHTML(HTML);
} catch (IOException | ParserConfigurationException | DocumentException e) {
e.printStackTrace();
}
}
private static void generateHTMLFromPDF(String filename) throws ParserConfigurationException, IOException {
PDDocument pdf = PDDocument.load(new File(filename));
PDFDomTree parser = new PDFDomTree();
Writer output = new PrintWriter("src/output/pdf.html", "utf-8");
parser.writeText(pdf, output);
output.close();
if (pdf != null) {
pdf.close();
}
}
private static void generatePDFFromHTML(String filename) throws ParserConfigurationException, IOException, DocumentException {
Document document = new Document();
PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream("src/output/html.pdf"));
document.open();
XMLWorkerHelper.getInstance().parseXHtml(writer, document, new FileInputStream(filename));
document.close();
}
}
@@ -0,0 +1,62 @@
package com.baeldung.pdf;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URL;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.tools.imageio.ImageIOUtil;
import com.itextpdf.text.BadElementException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.PdfWriter;
public class PDF2ImageExample {
private static final String PDF = "src/main/resources/pdf.pdf";
private static final String JPG = "http://cdn2.baeldung.netdna-cdn.com/wp-content/uploads/2016/05/baeldung-rest-widget-main-1.2.0";
private static final String GIF = "https://media.giphy.com/media/l3V0x6kdXUW9M4ONq/giphy";
public static void main(String[] args) {
try {
generateImageFromPDF(PDF, "png");
generateImageFromPDF(PDF, "jpeg");
generateImageFromPDF(PDF, "gif");
generatePDFFromImage(JPG, "jpg");
generatePDFFromImage(GIF, "gif");
} catch (IOException | DocumentException e) {
e.printStackTrace();
}
}
private static void generateImageFromPDF(String filename, String extension) throws IOException {
PDDocument document = PDDocument.load(new File(filename));
PDFRenderer pdfRenderer = new PDFRenderer(document);
for (int page = 0; page < document.getNumberOfPages(); ++page) {
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
ImageIOUtil.writeImage(bim, String.format("src/output/pdf-%d.%s", page + 1, extension), 300);
}
document.close();
}
private static void generatePDFFromImage(String filename, String extension)
throws IOException, BadElementException, DocumentException {
Document document = new Document();
String input = filename + "." + extension;
String output = "src/output/" + extension + ".pdf";
FileOutputStream fos = new FileOutputStream(output);
PdfWriter writer = PdfWriter.getInstance(document, fos);
writer.open();
document.open();
document.add(Image.getInstance((new URL(input))));
document.close();
writer.close();
}
}
@@ -0,0 +1,84 @@
package com.baeldung.pdf;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.PrintWriter;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.io.RandomAccessFile;
import org.apache.pdfbox.pdfparser.PDFParser;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.Element;
import com.itextpdf.text.Font;
import com.itextpdf.text.PageSize;
import com.itextpdf.text.Paragraph;
import com.itextpdf.text.pdf.PdfWriter;
public class PDF2TextExample {
private static final String PDF = "src/main/resources/pdf.pdf";
private static final String TXT = "src/main/resources/txt.txt";
public static void main(String[] args) {
try {
generateTxtFromPDF(PDF);
generatePDFFromTxt(TXT);
} catch (IOException | DocumentException e) {
e.printStackTrace();
}
}
private static void generateTxtFromPDF(String filename) throws IOException {
File f = new File(filename);
String parsedText;
PDFParser parser = new PDFParser(new RandomAccessFile(f, "r"));
parser.parse();
COSDocument cosDoc = parser.getDocument();
PDFTextStripper pdfStripper = new PDFTextStripper();
PDDocument pdDoc = new PDDocument(cosDoc);
parsedText = pdfStripper.getText(pdDoc);
if (cosDoc != null)
cosDoc.close();
if (pdDoc != null)
pdDoc.close();
PrintWriter pw = new PrintWriter("src/output/pdf.txt");
pw.print(parsedText);
pw.close();
}
private static void generatePDFFromTxt(String filename) throws IOException, DocumentException {
Document pdfDoc = new Document(PageSize.A4);
PdfWriter.getInstance(pdfDoc, new FileOutputStream("src/output/txt.pdf"))
.setPdfVersion(PdfWriter.PDF_VERSION_1_7);
pdfDoc.open();
Font myfont = new Font();
myfont.setStyle(Font.NORMAL);
myfont.setSize(11);
pdfDoc.add(new Paragraph("\n"));
BufferedReader br = new BufferedReader(new FileReader(filename));
String strLine;
while ((strLine = br.readLine()) != null) {
Paragraph para = new Paragraph(strLine + "\n", myfont);
para.setAlignment(Element.ALIGN_JUSTIFIED);
pdfDoc.add(para);
}
pdfDoc.close();
br.close();
}
}
@@ -0,0 +1,50 @@
package com.baeldung.pdf;
import java.io.FileOutputStream;
import java.io.IOException;
import org.apache.poi.xwpf.usermodel.BreakType;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
public class PDF2WordExample {
private static final String FILENAME = "src/main/resources/pdf.pdf";
public static void main(String[] args) {
try {
generateDocFromPDF(FILENAME);
} catch (IOException e) {
e.printStackTrace();
}
}
private static void generateDocFromPDF(String filename) throws IOException {
XWPFDocument doc = new XWPFDocument();
String pdf = filename;
PdfReader reader = new PdfReader(pdf);
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
for (int i = 1; i <= reader.getNumberOfPages(); i++) {
TextExtractionStrategy strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
String text = strategy.getResultantText();
XWPFParagraph p = doc.createParagraph();
XWPFRun run = p.createRun();
run.setText(text);
run.addBreak(BreakType.PAGE);
}
FileOutputStream out = new FileOutputStream("src/output/pdf.docx");
doc.write(out);
out.close();
reader.close();
doc.close();
}
}
@@ -0,0 +1,77 @@
package com.baeldung.pdf;
import java.io.FileOutputStream;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.stream.Stream;
import com.itextpdf.text.BadElementException;
import com.itextpdf.text.BaseColor;
import com.itextpdf.text.Document;
import com.itextpdf.text.Element;
import com.itextpdf.text.Image;
import com.itextpdf.text.Phrase;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfWriter;
public class PDFSampleMain {
public static void main(String[] args) {
try {
Document document = new Document();
PdfWriter.getInstance(document, new FileOutputStream("iTextTable.pdf"));
document.open();
PdfPTable table = new PdfPTable(3);
addTableHeader(table);
addRows(table);
addCustomRows(table);
document.add(table);
document.close();
} catch (Exception e) {
e.printStackTrace();
}
}
private static void addTableHeader(PdfPTable table) {
Stream.of("column header 1", "column header 2", "column header 3")
.forEach(columnTitle -> {
PdfPCell header = new PdfPCell();
header.setBackgroundColor(BaseColor.LIGHT_GRAY);
header.setBorderWidth(2);
header.setPhrase(new Phrase(columnTitle));
table.addCell(header);
});
}
private static void addRows(PdfPTable table) {
table.addCell("row 1, col 1");
table.addCell("row 1, col 2");
table.addCell("row 1, col 3");
}
private static void addCustomRows(PdfPTable table) throws URISyntaxException, BadElementException, IOException {
Path path = Paths.get(ClassLoader.getSystemResource("Java_logo.png").toURI());
Image img = Image.getInstance(path.toAbsolutePath().toString());
img.scalePercent(10);
PdfPCell imageCell = new PdfPCell(img);
table.addCell(imageCell);
PdfPCell horizontalAlignCell = new PdfPCell(new Phrase("row 2, col 2"));
horizontalAlignCell.setHorizontalAlignment(Element.ALIGN_CENTER);
table.addCell(horizontalAlignCell);
PdfPCell verticalAlignCell = new PdfPCell(new Phrase("row 2, col 3"));
verticalAlignCell.setVerticalAlignment(Element.ALIGN_BOTTOM);
table.addCell(verticalAlignCell);
}
}
@@ -0,0 +1,56 @@
package com.baeldung.pdf.openpdf;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.commons.io.IOUtils;
import org.w3c.dom.Element;
import org.xhtmlrenderer.extend.FSImage;
import org.xhtmlrenderer.extend.ReplacedElement;
import org.xhtmlrenderer.extend.ReplacedElementFactory;
import org.xhtmlrenderer.extend.UserAgentCallback;
import org.xhtmlrenderer.layout.LayoutContext;
import org.xhtmlrenderer.pdf.ITextFSImage;
import org.xhtmlrenderer.pdf.ITextImageElement;
import org.xhtmlrenderer.render.BlockBox;
import org.xhtmlrenderer.simple.extend.FormSubmissionListener;
import com.lowagie.text.Image;
public class CustomElementFactoryImpl implements ReplacedElementFactory {
@Override
public ReplacedElement createReplacedElement(LayoutContext lc, BlockBox box, UserAgentCallback uac, int cssWidth, int cssHeight) {
Element e = box.getElement();
String nodeName = e.getNodeName();
if (nodeName.equals("img")) {
String imagePath = e.getAttribute("src");
try {
InputStream input = new FileInputStream("src/main/resources/" + imagePath);
byte[] bytes = IOUtils.toByteArray(input);
Image image = Image.getInstance(bytes);
FSImage fsImage = new ITextFSImage(image);
if (cssWidth != -1 || cssHeight != -1) {
fsImage.scale(cssWidth, cssHeight);
} else {
fsImage.scale(2000, 1000);
}
return new ITextImageElement(fsImage);
} catch (Exception e1) {
e1.printStackTrace();
}
}
return null;
}
@Override
public void reset() {
}
@Override
public void remove(Element e) {
}
@Override
public void setFormSubmissionListener(FormSubmissionListener listener) {
}
}
@@ -0,0 +1,53 @@
package com.baeldung.pdf.openpdf;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.xhtmlrenderer.layout.SharedContext;
import org.xhtmlrenderer.pdf.ITextRenderer;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
public class Html2PdfUsingFlyingSaucer {
private static final String HTML_INPUT = "src/main/resources/htmlforopenpdf.html";
private static final String PDF_OUTPUT = "src/main/resources/html2pdf.pdf";
public static void main(String[] args) {
try {
Html2PdfUsingFlyingSaucer htmlToPdf = new Html2PdfUsingFlyingSaucer();
htmlToPdf.generateHtmlToPdf();
} catch (Exception e) {
e.printStackTrace();
}
}
private void generateHtmlToPdf() throws Exception {
File inputHTML = new File(HTML_INPUT);
Document inputHtml = createWellFormedHtml(inputHTML);
File outputPdf = new File(PDF_OUTPUT);
xhtmlToPdf(inputHtml, outputPdf);
}
private Document createWellFormedHtml(File inputHTML) throws IOException {
Document document = Jsoup.parse(inputHTML, "UTF-8");
document.outputSettings()
.syntax(Document.OutputSettings.Syntax.xml);
return document;
}
private void xhtmlToPdf(Document xhtml, File outputPdf) throws Exception {
try (OutputStream outputStream = new FileOutputStream(outputPdf)) {
ITextRenderer renderer = new ITextRenderer();
SharedContext sharedContext = renderer.getSharedContext();
sharedContext.setPrint(true);
sharedContext.setInteractive(false);
sharedContext.setReplacedElementFactory(new CustomElementFactoryImpl());
renderer.setDocumentFromString(xhtml.html());
renderer.layout();
renderer.createPDF(outputStream);
}
}
}
@@ -0,0 +1,55 @@
package com.baeldung.pdf.openpdf;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.FileSystems;
import org.jsoup.Jsoup;
import org.jsoup.helper.W3CDom;
import org.jsoup.nodes.Document;
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;
public class Html2PdfUsingOpenHtml {
private static final String HTML_INPUT = "src/main/resources/htmlforopenpdf.html";
private static final String PDF_OUTPUT = "src/main/resources/html2pdf.pdf";
public static void main(String[] args) {
try {
Html2PdfUsingOpenHtml htmlToPdf = new Html2PdfUsingOpenHtml();
htmlToPdf.generateHtmlToPdf();
} catch (Exception e) {
e.printStackTrace();
}
}
private void generateHtmlToPdf() throws IOException {
File inputHTML = new File(HTML_INPUT);
Document doc = createWellFormedHtml(inputHTML);
xhtmlToPdf(doc, PDF_OUTPUT);
}
private Document createWellFormedHtml(File inputHTML) throws IOException {
Document document = Jsoup.parse(inputHTML, "UTF-8");
document.outputSettings()
.syntax(Document.OutputSettings.Syntax.xml);
return document;
}
private void xhtmlToPdf(Document doc, String outputPdf) throws IOException {
try (OutputStream os = new FileOutputStream(outputPdf)) {
String baseUri = FileSystems.getDefault()
.getPath("src/main/resources/")
.toUri()
.toString();
PdfRendererBuilder builder = new PdfRendererBuilder();
builder.withUri(outputPdf);
builder.toStream(os);
builder.withW3cDocument(new W3CDom().fromJsoup(doc), baseUri);
builder.run();
}
}
}
@@ -0,0 +1,48 @@
package com.baeldung.pdfthymeleaf;
import com.lowagie.text.DocumentException;
import org.thymeleaf.TemplateEngine;
import org.thymeleaf.context.Context;
import org.thymeleaf.templatemode.TemplateMode;
import org.thymeleaf.templateresolver.ClassLoaderTemplateResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
public class PDFThymeleafExample {
public static void main(String[] args) throws IOException, DocumentException {
PDFThymeleafExample thymeleaf2Pdf = new PDFThymeleafExample();
String html = thymeleaf2Pdf.parseThymeleafTemplate();
thymeleaf2Pdf.generatePdfFromHtml(html);
}
public void generatePdfFromHtml(String html) throws IOException, DocumentException {
String outputFolder = System.getProperty("user.home") + File.separator + "thymeleaf.pdf";
OutputStream outputStream = new FileOutputStream(outputFolder);
ITextRenderer renderer = new ITextRenderer();
renderer.setDocumentFromString(html);
renderer.layout();
renderer.createPDF(outputStream);
outputStream.close();
}
private String parseThymeleafTemplate() {
ClassLoaderTemplateResolver templateResolver = new ClassLoaderTemplateResolver();
templateResolver.setSuffix(".html");
templateResolver.setTemplateMode(TemplateMode.HTML);
TemplateEngine templateEngine = new TemplateEngine();
templateEngine.setTemplateResolver(templateResolver);
Context context = new Context();
context.setVariable("to", "Baeldung.com");
return templateEngine.process("thymeleaf_template", context);
}
}
Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

@@ -0,0 +1,53 @@
<!DOCTYPE html>
<html>
<head>
<title>A very simple webpage</title>
</head>
<body>
<h1>A very simple webpage. This is an "h1" level header.</h1>
<h2>This is a level h2 header.</h2>
<h6>This is a level h6 header. Pretty small!</h6>
<p>This is a standard paragraph.</p>
<p align=center>Now I've aligned it in the center of the screen.</p>
<p align=right>Now aligned to the right</p>
<p><b>Bold text</b></p>
<p><strong>Strongly emphasized text</strong> Can you tell the difference vs. bold?</p>
<p><i>Italics</i></p>
<p><em>Emphasized text</em> Just like Italics!</p>
<h2>How about a nice ordered list!</h2>
<ol>
<li>This little piggy went to market</li>
<li>This little piggy went to SB228 class</li>
<li>This little piggy went to an expensive restaurant in Downtown Palo Alto</li>
<li>This little piggy ate too much at Indian Buffet.</li>
<li>This little piggy got lost</li>
</ol>
<h2>Unordered list</h2>
<ul>
<li>First element</li>
<li>Second element</li>
<li>Third element</li>
</ul>
<p>And finally, how about some</p><a href="http://www.google.com/">Links?</a>
<p>Remember, you can view the HTMl code from this or any other page by using the "View Page Source" command of your browser.</p>
</body>
</html>
@@ -0,0 +1,26 @@
<html>
<head>
<style>
.center_div {
border: 1px solid gray;
margin-left: auto;
margin-right: auto;
width: 90%;
background-color: #d0f0f6;
text-align: left;
padding: 8px;
}
</style>
<link href="style.css" rel="stylesheet">
</head>
<body>
<div class="center_div">
<h1>Hello Baeldung!</h1>
<img src="Java_logo.png">
<div class="myclass">
<p>This is the tutorial to convert html to pdf.</p>
</div>
</div>
</body>
</html>
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
</pattern>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="STDOUT" />
</root>
</configuration>
@@ -0,0 +1,6 @@
.myclass{
font-family: Helvetica, sans-serif;
font-size:25;
font-weight: normal;
color: blue;
}
@@ -0,0 +1,7 @@
<html lang="en" xmlns:th="http://www.thymeleaf.org">
<body>
<h3 style="text-align: center; color: green">
<span th:text="'Welcome to ' + ${to} + '!'"></span>
</h3>
</body>
</html>
@@ -0,0 +1,3 @@
Test
Text
Test TEST
@@ -0,0 +1,84 @@
package com.baeldung.pdf.base64;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import static org.junit.Assert.*;
public class EncodeDecodeUnitTest {
private static final String IN_FILE = "src/test/resources/input.pdf";
private static final String OUT_FILE = "src/test/resources/output.pdf";
private static byte[] inFileBytes;
@BeforeClass
public static void fileToByteArray() throws IOException {
inFileBytes = Files.readAllBytes(Paths.get(IN_FILE));
}
@Test
public void givenJavaBase64_whenEncoded_thenDecodedOK() throws IOException {
byte[] encoded = java.util.Base64.getEncoder().encode(inFileBytes);
byte[] decoded = java.util.Base64.getDecoder().decode(encoded);
writeToFile(OUT_FILE, decoded);
assertNotEquals(encoded.length, decoded.length);
assertEquals(inFileBytes.length, decoded.length);
assertArrayEquals(decoded, inFileBytes);
}
@Test
public void givenJavaBase64_whenEncodedStream_thenDecodedStreamOK() throws IOException {
try (OutputStream os = java.util.Base64.getEncoder().wrap(new FileOutputStream(OUT_FILE));
FileInputStream fis = new FileInputStream(IN_FILE)) {
byte[] bytes = new byte[1024];
int read;
while ((read = fis.read(bytes)) > -1) {
os.write(bytes, 0, read);
}
}
byte[] encoded = java.util.Base64.getEncoder().encode(inFileBytes);
byte[] encodedOnDisk = Files.readAllBytes(Paths.get(OUT_FILE));
assertArrayEquals(encoded, encodedOnDisk);
byte[] decoded = java.util.Base64.getDecoder().decode(encoded);
byte[] decodedOnDisk = java.util.Base64.getDecoder().decode(encodedOnDisk);
assertArrayEquals(decoded, decodedOnDisk);
}
@Test
public void givenApacheCommons_givenJavaBase64_whenEncoded_thenDecodedOK() throws IOException {
byte[] encoded = org.apache.commons.codec.binary.Base64.encodeBase64(inFileBytes);
byte[] decoded = org.apache.commons.codec.binary.Base64.decodeBase64(encoded);
writeToFile(OUT_FILE, decoded);
assertNotEquals(encoded.length, decoded.length);
assertEquals(inFileBytes.length, decoded.length);
assertArrayEquals(decoded, inFileBytes);
}
private void writeToFile(String fileName, byte[] bytes) throws IOException {
FileOutputStream fos = new FileOutputStream(fileName);
fos.write(bytes);
fos.flush();
fos.close();
}
}
@@ -0,0 +1,52 @@
package com.baeldung.pdfreadertest;
import static org.junit.jupiter.api.Assertions.*;
import java.io.File;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.jupiter.api.Test;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
class ReadPdfFileUnitTest {
@Test
public void givenSamplePdf_whenUsingApachePdfBox_thenCompareOutput() throws IOException {
String expectedText = "Hello World!\n";
File file = new File("sample.pdf");
PDDocument document = PDDocument.load(file);
PDFTextStripper stripper = new PDFTextStripper();
String text = stripper.getText(document);
document.close();
assertEquals(expectedText, text);
}
@Test
public void givenSamplePdf_whenUsingiTextPdf_thenCompareOutput() throws IOException {
String expectedText = "Hello World!";
PdfReader reader = new PdfReader("sample.pdf");
int pages = reader.getNumberOfPages();
StringBuilder text = new StringBuilder();
for (int i = 1; i <= pages; i++) {
text.append(PdfTextExtractor.getTextFromPage(reader, i));
}
reader.close();
assertEquals(expectedText, text.toString());
}
}
@@ -0,0 +1,52 @@
package com.baeldung.pdfthymeleaf;
import com.lowagie.text.DocumentException;
import org.junit.Test;
import org.thymeleaf.TemplateEngine;
import org.thymeleaf.context.Context;
import org.thymeleaf.templatemode.TemplateMode;
import org.thymeleaf.templateresolver.ClassLoaderTemplateResolver;
import org.xhtmlrenderer.pdf.ITextRenderer;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import static org.junit.Assert.assertTrue;
public class PDFThymeleafUnitTest {
@Test
public void givenThymeleafTemplate_whenParsedAndRenderedToPDF_thenItShouldNotBeEmpty() throws DocumentException, IOException {
String html = parseThymeleafTemplate();
ByteArrayOutputStream outputStream = generatePdfOutputStreamFromHtml(html);
assertTrue(outputStream.size() > 0);
}
private ByteArrayOutputStream generatePdfOutputStreamFromHtml(String html) throws IOException, DocumentException {
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
ITextRenderer renderer = new ITextRenderer();
renderer.setDocumentFromString(html);
renderer.layout();
renderer.createPDF(outputStream);
outputStream.close();
return outputStream;
}
private String parseThymeleafTemplate() {
ClassLoaderTemplateResolver templateResolver = new ClassLoaderTemplateResolver();
templateResolver.setSuffix(".html");
templateResolver.setTemplateMode(TemplateMode.HTML);
TemplateEngine templateEngine = new TemplateEngine();
templateEngine.setTemplateResolver(templateResolver);
Context context = new Context();
context.setVariable("to", "Baeldung.com");
return templateEngine.process("thymeleaf_template", context);
}
}
+24
View File
@@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>text-processing-libraries-modules</artifactId>
<name>text-processing-libraries-modules</name>
<packaging>pom</packaging>
<parent>
<artifactId>parent-modules</artifactId>
<groupId>com.baeldung</groupId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<modules>
<module>antlr</module>
<module>apache-tika</module>
<module>asciidoctor</module>
<module>pdf</module>
<module>pdf-2</module>
</modules>
</project>