JAVA-29281 Create new module Text Processing Libraries Modules (#15479)

2023-12-31 08:37:40 -08:00
parent 1768eee09f
commit 76bde3ff46
78 changed files with 208 additions and 189 deletions
@@ -0,0 +1,3 @@
+## Text Processing Libraries 
+
+This module contains modules about Text Processing Libraries. 
@@ -0,0 +1,7 @@
+## ANTLR
+
+This module contains articles about ANTLR
+
+### Relevant Articles: 
+
+- [Java with ANTLR](https://www.baeldung.com/java-antlr)
@@ -0,0 +1,63 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>antlr</artifactId>
+    <name>antlr</name>
+
+    <parent>
+        <groupId>com.baeldung</groupId>
+        <artifactId>text-processing-libraries-modules</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.antlr</groupId>
+            <artifactId>antlr4-runtime</artifactId>
+            <version>${antlr.version}</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.antlr</groupId>
+                <artifactId>antlr4-maven-plugin</artifactId>
+                <version>${antlr.version}</version>
+                <executions>
+                    <execution>
+                        <goals>
+                            <goal>antlr4</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+                <version>${mojo.version}</version>
+                <executions>
+                    <execution>
+                        <phase>generate-sources</phase>
+                        <goals>
+                            <goal>add-source</goal>
+                        </goals>
+                        <configuration>
+                            <sources>
+                                <source>${basedir}/target/generated-sources/antlr4</source>
+                            </sources>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
+
+    <properties>
+        <antlr.version>4.7.1</antlr.version>
+        <mojo.version>3.0.0</mojo.version>
+    </properties>
+
+</project>
@@ -0,0 +1,16 @@
+grammar Log;
+
+log : entry+;
+entry : timestamp ' ' level ' ' message CRLF;
+timestamp : DATE ' ' TIME;
+level : 'ERROR' | 'INFO' | 'DEBUG';
+message : (TEXT | ' ')+;
+
+fragment DIGIT : [0-9];
+fragment TWODIGIT : DIGIT DIGIT;
+fragment LETTER : [A-Za-z];
+
+DATE : TWODIGIT TWODIGIT '-' LETTER LETTER LETTER '-' TWODIGIT;
+TIME : TWODIGIT ':' TWODIGIT ':' TWODIGIT;
+TEXT : LETTER+;
+CRLF : '\r'? '\n' | '\r';
@@ -0,0 +1,28 @@
+package com.baeldung.antlr.java;
+
+import com.baeldung.antlr.Java8BaseListener;
+import com.baeldung.antlr.Java8Parser;
+import org.antlr.v4.runtime.tree.TerminalNode;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public class UppercaseMethodListener extends Java8BaseListener {
+
+    private List<String> errors = new ArrayList<String>();
+
+    @Override
+    public void enterMethodDeclarator(Java8Parser.MethodDeclaratorContext ctx) {
+        TerminalNode node = ctx.Identifier();
+        String methodName = node.getText();
+
+        if (Character.isUpperCase(methodName.charAt(0))){
+            errors.add(String.format("Method %s is uppercased!", methodName));
+        }
+    }
+
+    public List<String> getErrors(){
+        return Collections.unmodifiableList(errors);
+    }
+}
@@ -0,0 +1,51 @@
+package com.baeldung.antlr.log;
+
+import com.baeldung.antlr.LogBaseListener;
+import com.baeldung.antlr.LogParser;
+import com.baeldung.antlr.log.model.LogLevel;
+import com.baeldung.antlr.log.model.LogEntry;
+
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Locale;
+
+public class LogListener extends LogBaseListener {
+
+    private static final DateTimeFormatter DEFAULT_DATETIME_FORMATTER
+            = DateTimeFormatter.ofPattern("yyyy-MMM-dd HH:mm:ss", Locale.ENGLISH);
+
+    private List<LogEntry> entries = new ArrayList<>();
+    private LogEntry currentLogEntry;
+
+    @Override
+    public void enterEntry(LogParser.EntryContext ctx) {
+        this.currentLogEntry = new LogEntry();
+    }
+
+    @Override
+    public void exitEntry(LogParser.EntryContext ctx) {
+        entries.add(currentLogEntry);
+    }
+
+    @Override
+    public void enterTimestamp(LogParser.TimestampContext ctx) {
+        currentLogEntry.setTimestamp(LocalDateTime.parse(ctx.getText(), DEFAULT_DATETIME_FORMATTER));
+    }
+
+    @Override
+    public void enterMessage(LogParser.MessageContext ctx) {
+        currentLogEntry.setMessage(ctx.getText());
+    }
+
+    @Override
+    public void enterLevel(LogParser.LevelContext ctx) {
+        currentLogEntry.setLevel(LogLevel.valueOf(ctx.getText()));
+    }
+
+    public List<LogEntry> getEntries() {
+        return Collections.unmodifiableList(entries);
+    }
+}
@@ -0,0 +1,35 @@
+package com.baeldung.antlr.log.model;
+
+
+import java.time.LocalDateTime;
+
+public class LogEntry {
+
+    private LogLevel level;
+    private String message;
+    private LocalDateTime timestamp;
+
+    public LogLevel getLevel() {
+        return level;
+    }
+
+    public void setLevel(LogLevel level) {
+        this.level = level;
+    }
+
+    public String getMessage() {
+        return message;
+    }
+
+    public void setMessage(String message) {
+        this.message = message;
+    }
+
+    public LocalDateTime getTimestamp() {
+        return timestamp;
+    }
+
+    public void setTimestamp(LocalDateTime timestamp) {
+        this.timestamp = timestamp;
+    }
+}
@@ -0,0 +1,5 @@
+package com.baeldung.antlr.log.model;
+
+public enum LogLevel {
+    DEBUG, INFO, ERROR
+}
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<configuration>
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <encoder>
+            <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+            </pattern>
+        </encoder>
+    </appender>
+
+    <root level="INFO">
+        <appender-ref ref="STDOUT" />
+    </root>
+</configuration>
@@ -0,0 +1,30 @@
+package com.baeldung.antlr;
+
+import com.baeldung.antlr.java.UppercaseMethodListener;
+import org.antlr.v4.runtime.CharStreams;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.tree.ParseTree;
+import org.antlr.v4.runtime.tree.ParseTreeWalker;
+import org.junit.Test;
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+public class JavaParserUnitTest {
+
+    @Test
+    public void whenOneMethodStartsWithUpperCase_thenOneErrorReturned() throws Exception{
+
+        String javaClassContent = "public class SampleClass { void DoSomething(){} }";
+        Java8Lexer java8Lexer = new Java8Lexer(CharStreams.fromString(javaClassContent));
+        CommonTokenStream tokens = new CommonTokenStream(java8Lexer);
+        Java8Parser java8Parser = new Java8Parser(tokens);
+        ParseTree tree = java8Parser.compilationUnit();
+        ParseTreeWalker walker = new ParseTreeWalker();
+        UppercaseMethodListener uppercaseMethodListener = new UppercaseMethodListener();
+        walker.walk(uppercaseMethodListener, tree);
+
+        assertThat(uppercaseMethodListener.getErrors().size(), is(1));
+        assertThat(uppercaseMethodListener.getErrors().get(0),
+                is("Method DoSomething is uppercased!"));
+    }
+}
@@ -0,0 +1,36 @@
+package com.baeldung.antlr;
+
+import static org.hamcrest.CoreMatchers.is;
+import static org.hamcrest.MatcherAssert.assertThat;
+
+import com.baeldung.antlr.log.LogListener;
+import com.baeldung.antlr.log.model.LogLevel;
+import com.baeldung.antlr.log.model.LogEntry;
+import org.antlr.v4.runtime.CharStreams;
+import org.antlr.v4.runtime.CommonTokenStream;
+import org.antlr.v4.runtime.tree.ParseTreeWalker;
+import org.junit.Test;
+
+import java.time.LocalDateTime;
+
+
+public class LogParserUnitTest {
+
+    @Test
+    public void whenLogContainsOneErrorLogEntry_thenOneErrorIsReturned() throws Exception {
+        String logLines = "2018-May-05 14:20:21 DEBUG entering awesome method\r\n" +
+                "2018-May-05 14:20:24 ERROR Bad thing happened\r\n";
+        LogLexer serverLogLexer = new LogLexer(CharStreams.fromString(logLines));
+        CommonTokenStream tokens = new CommonTokenStream( serverLogLexer );
+        LogParser logParser = new LogParser(tokens);
+        ParseTreeWalker walker = new ParseTreeWalker();
+        LogListener logWalker = new LogListener();
+        walker.walk(logWalker, logParser.log());
+
+        assertThat(logWalker.getEntries().size(), is(2));
+        LogEntry error = logWalker.getEntries().get(1);
+        assertThat(error.getLevel(), is(LogLevel.ERROR));
+        assertThat(error.getMessage(), is("Bad thing happened"));
+        assertThat(error.getTimestamp(), is(LocalDateTime.of(2018,5,5,14,20,24)));
+    }
+}
@@ -0,0 +1,7 @@
+## Apache Tika
+
+This module contains articles about Apache Tika
+
+### Relevant articles:
+
+- [Content Analysis with Apache Tika](https://www.baeldung.com/apache-tika)
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>apache-tika</artifactId>
+    <version>0.0.1-SNAPSHOT</version>
+    <name>apache-tika</name>
+
+    <parent>
+        <groupId>com.baeldung</groupId>
+        <artifactId>text-processing-libraries-modules</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.tika</groupId>
+            <artifactId>tika-parsers</artifactId>
+            <version>${tika.version}</version>
+        </dependency>
+    </dependencies>
+
+    <properties>
+        <tika.version>1.17</tika.version>
+    </properties>
+
+</project>
@@ -0,0 +1,67 @@
+package com.baeldung.tika;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.Tika;
+import org.apache.tika.detect.DefaultDetector;
+import org.apache.tika.detect.Detector;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class TikaAnalysis {
+    public static String detectDocTypeUsingDetector(InputStream stream) throws IOException {
+        Detector detector = new DefaultDetector();
+        Metadata metadata = new Metadata();
+
+        MediaType mediaType = detector.detect(stream, metadata);
+        return mediaType.toString();
+    }
+
+    public static String detectDocTypeUsingFacade(InputStream stream) throws IOException {
+        Tika tika = new Tika();
+        String mediaType = tika.detect(stream);
+        return mediaType;
+    }
+
+    public static String extractContentUsingParser(InputStream stream) throws IOException, TikaException, SAXException {
+        Parser parser = new AutoDetectParser();
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+
+        parser.parse(stream, handler, metadata, context);
+        return handler.toString();
+    }
+
+    public static String extractContentUsingFacade(InputStream stream) throws IOException, TikaException {
+        Tika tika = new Tika();
+        String content = tika.parseToString(stream);
+        return content;
+    }
+
+    public static Metadata extractMetadatatUsingParser(InputStream stream) throws IOException, SAXException, TikaException {
+        Parser parser = new AutoDetectParser();
+        ContentHandler handler = new BodyContentHandler();
+        Metadata metadata = new Metadata();
+        ParseContext context = new ParseContext();
+
+        parser.parse(stream, handler, metadata, context);
+        return metadata;
+    }
+
+    public static Metadata extractMetadatatUsingFacade(InputStream stream) throws IOException, TikaException {
+        Tika tika = new Tika();
+        Metadata metadata = new Metadata();
+
+        tika.parse(stream, metadata);
+        return metadata;
+    }
+}
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<configuration>
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <encoder>
+            <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+            </pattern>
+        </encoder>
+    </appender>
+
+    <root level="INFO">
+        <appender-ref ref="STDOUT" />
+    </root>
+</configuration>
@@ -0,0 +1,79 @@
+package com.baeldung.tika;
+
+import static org.hamcrest.CoreMatchers.containsString;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertThat;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+import org.xml.sax.SAXException;
+
+public class TikaUnitTest {
+    @Test
+    public void whenUsingDetector_thenDocumentTypeIsReturned() throws IOException {
+        InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.txt");
+        String mediaType = TikaAnalysis.detectDocTypeUsingDetector(stream);
+
+        assertEquals("application/pdf", mediaType);
+
+        stream.close();
+    }
+
+    @Test
+    public void whenUsingFacade_thenDocumentTypeIsReturned() throws IOException {
+        InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.txt");
+        String mediaType = TikaAnalysis.detectDocTypeUsingFacade(stream);
+
+        assertEquals("application/pdf", mediaType);
+
+        stream.close();
+    }
+
+    @Test
+    public void whenUsingParser_thenContentIsReturned() throws IOException, TikaException, SAXException {
+        InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.docx");
+        String content = TikaAnalysis.extractContentUsingParser(stream);
+
+        assertThat(content, containsString("Apache Tika - a content analysis toolkit"));
+        assertThat(content, containsString("detects and extracts metadata and text"));
+
+        stream.close();
+    }
+
+    @Test
+    public void whenUsingFacade_thenContentIsReturned() throws IOException, TikaException {
+        InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.docx");
+        String content = TikaAnalysis.extractContentUsingFacade(stream);
+
+        assertThat(content, containsString("Apache Tika - a content analysis toolkit"));
+        assertThat(content, containsString("detects and extracts metadata and text"));
+
+        stream.close();
+    }
+
+    @Test
+    public void whenUsingParser_thenMetadataIsReturned() throws IOException, TikaException, SAXException {
+        InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.xlsx");
+        Metadata metadata = TikaAnalysis.extractMetadatatUsingParser(stream);
+
+        assertEquals("org.apache.tika.parser.DefaultParser", metadata.get("X-Parsed-By"));
+        assertEquals("Microsoft Office User", metadata.get("Author"));
+
+        stream.close();
+    }
+
+    @Test
+    public void whenUsingFacade_thenMetadataIsReturned() throws IOException, TikaException {
+        InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.xlsx");
+        Metadata metadata = TikaAnalysis.extractMetadatatUsingFacade(stream);
+
+        assertEquals("org.apache.tika.parser.DefaultParser", metadata.get("X-Parsed-By"));
+        assertEquals("Microsoft Office User", metadata.get("Author"));
+
+        stream.close();
+    }
+}
@@ -0,0 +1,8 @@
+## Asciidoctor
+
+This module contains articles about Asciidoctor
+
+### Relevant articles:
+
+- [Generating a Book with Asciidoctor](https://www.baeldung.com/asciidoctor-book)
+- [Introduction to Asciidoctor in Java](https://www.baeldung.com/asciidoctor)
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>asciidoctor</artifactId>
+    <name>asciidoctor</name>
+
+    <parent>
+        <groupId>com.baeldung</groupId>
+        <artifactId>text-processing-libraries-modules</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.asciidoctor</groupId>
+            <artifactId>asciidoctorj</artifactId>
+            <version>${asciidoctorj.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.asciidoctor</groupId>
+            <artifactId>asciidoctorj-pdf</artifactId>
+            <version>${asciidoctorj-pdf.version}</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.asciidoctor</groupId>
+                <artifactId>asciidoctor-maven-plugin</artifactId>
+                <version>${asciidoctor-maven-plugin.version}</version>
+                <dependencies>
+                    <dependency>
+                        <groupId>org.asciidoctor</groupId>
+                        <artifactId>asciidoctorj-pdf</artifactId>
+                        <version>${asciidoctorj-pdf.plugin.version}</version>
+                    </dependency>
+                </dependencies>
+                <executions>
+                    <execution>
+                        <id>output-pdf</id>
+                        <phase>generate-resources</phase>
+                        <goals>
+                            <goal>process-asciidoc</goal>
+                        </goals>
+                    </execution>
+                </executions>
+                <configuration>
+                    <sourceDirectory>src/docs/asciidoc</sourceDirectory>
+                    <outputDirectory>target/docs/asciidoc</outputDirectory>
+                    <attributes>
+                        <pdf-stylesdir>${project.basedir}/src/themes</pdf-stylesdir>
+                        <pdf-style>custom</pdf-style>
+                    </attributes>
+                    <backend>pdf</backend>
+                    <doctype>book</doctype>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+    <properties>
+        <asciidoctor-maven-plugin.version>2.2.2</asciidoctor-maven-plugin.version>
+        <asciidoctorj.version>2.5.7</asciidoctorj.version>
+        <asciidoctorj-pdf.version>2.3.4</asciidoctorj-pdf.version>
+        <asciidoctorj-pdf.plugin.version>2.3.4</asciidoctorj-pdf.plugin.version>
+    </properties>
+
+</project>
@@ -0,0 +1,13 @@
+:icons: font
+
+
+= Generating book with AsciiDoctorj
+Baeldung
+
+[abstract]
+This is the actual content.
+
+== First Section
+
+This is first section of the book where you can include some nice icons like icon:comment[].
+You can also create http://www.baeldung.com[links]
@@ -0,0 +1,33 @@
+package com.baeldung.asciidoctor;
+
+import static org.asciidoctor.Asciidoctor.Factory.create;
+import static org.asciidoctor.OptionsBuilder.options;
+
+import java.io.File;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.asciidoctor.Asciidoctor;
+
+public class AsciidoctorDemo {
+
+    private final Asciidoctor asciidoctor;
+
+    AsciidoctorDemo() {
+        asciidoctor = create();
+    }
+
+    public void generatePDFFromString(final String input) {
+
+        final Map<String, Object> options = options().inPlace(true)
+          .backend("pdf")
+          .asMap();
+
+
+        final String outfile = asciidoctor.convertFile(new File("sample.adoc"), options);
+    }
+
+    String generateHTMLFromString(final String input) {
+        return asciidoctor.convert("Hello _Baeldung_!", new HashMap<String, Object>());
+    }
+}
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<configuration>
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <encoder>
+            <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+            </pattern>
+        </encoder>
+    </appender>
+
+    <root level="INFO">
+        <appender-ref ref="STDOUT" />
+    </root>
+</configuration>
@@ -0,0 +1,13 @@
+package com.baeldung.asciidoctor;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class AsciidoctorDemoIntegrationTest {
+
+    @Test
+    public void givenString_whenConverting_thenResultingHTMLCode() {
+        final AsciidoctorDemo asciidoctorDemo = new AsciidoctorDemo();
+        Assert.assertEquals(asciidoctorDemo.generateHTMLFromString("Hello _Baeldung_!"), "<div class=\"paragraph\">\n<p>Hello <em>Baeldung</em>!</p>\n</div>");
+    }
+}
@@ -0,0 +1,29 @@
+title_page:
+  align: left
+
+page:
+  layout: portrait
+  margin: [0.75in, 1in, 0.75in, 1in]
+  size: A4
+base:
+  font_color: #333333
+  line_height_length: 17
+  line_height: $base_line_height_length / $base_font_size
+link:
+  font_color: #009900
+
+header:
+  height: 0.5in
+  line_height: 1
+  recto_content:
+    center: '{document-title}'
+  verso_content:
+    center: '{document-title}'
+
+footer:
+  height: 0.5in
+  line_height: 1
+  recto_content:
+    right: '{chapter-title} | *{page-number}*'
+  verso_content:
+    left: '*{page-number}* | {chapter-title}'
@@ -0,0 +1,3 @@
+## Relevant articles
+- [Editing Existing PDF Files in Java](https://www.baeldung.com/java-edit-existing-pdf)
+- [Get Information About a PDF in Java](https://www.baeldung.com/java-pdf-info)
@@ -0,0 +1,75 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>pdf-2</artifactId>
+    <name>pdf-2</name>
+    <url>http://maven.apache.org</url>
+
+    <parent>
+        <groupId>com.baeldung</groupId>
+        <artifactId>text-processing-libraries-modules</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
+    <dependencies>
+        <dependency>
+            <groupId>com.itextpdf</groupId>
+            <artifactId>itext7-core</artifactId>
+            <version>${itextpdf.core.version}</version>
+            <type>pom</type>
+        </dependency>
+        <dependency>
+            <groupId>com.itextpdf</groupId>
+            <artifactId>cleanup</artifactId>
+            <version>${itextpdf.cleanup.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.itextpdf</groupId>
+            <artifactId>itextpdf</artifactId>
+            <version>${itextpdf.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>pdfbox</artifactId>
+            <version>${pdfbox.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml</artifactId>
+            <version>${poi-ooxml.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-api</artifactId>
+            <version>${log4j-api.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.logging.log4j</groupId>
+            <artifactId>log4j-core</artifactId>
+            <version>${log4j-core.version}</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <finalName>pdf-2</finalName>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+            </resource>
+        </resources>
+    </build>
+
+    <properties>
+        <itextpdf.version>5.5.13.3</itextpdf.version>
+        <itextpdf.core.version>7.2.3</itextpdf.core.version>
+        <itextpdf.cleanup.version>3.0.1</itextpdf.cleanup.version>
+        <pdfbox.version>3.0.0</pdfbox.version>
+        <poi-ooxml.version>5.2.5</poi-ooxml.version>
+        <log4j-api.version>2.20.0</log4j-api.version>
+        <log4j-core.version>2.20.0</log4j-core.version>
+    </properties>
+
+</project>
@@ -0,0 +1,242 @@
+package com.baeldung.exceltopdf;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.math.BigDecimal;
+import java.util.Iterator;
+
+import org.apache.poi.ss.usermodel.CellStyle;
+import org.apache.poi.ss.usermodel.HorizontalAlignment;
+import org.apache.poi.ss.usermodel.IndexedColors;
+import org.apache.poi.ss.usermodel.VerticalAlignment;
+import org.apache.poi.xssf.usermodel.XSSFColor;
+import org.apache.poi.xssf.usermodel.XSSFFont;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.ss.usermodel.CellType;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Cell;
+
+import com.itextpdf.text.BaseColor;
+import com.itextpdf.text.Document;
+import com.itextpdf.text.DocumentException;
+import com.itextpdf.text.Element;
+import com.itextpdf.text.Font;
+import com.itextpdf.text.FontFactory;
+import com.itextpdf.text.Paragraph;
+import com.itextpdf.text.Phrase;
+import com.itextpdf.text.pdf.PdfPCell;
+import com.itextpdf.text.pdf.PdfPTable;
+import com.itextpdf.text.pdf.PdfWriter;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+public class ExcelToPDFConverter {
+
+    private static final Logger logger = LogManager.getLogger(ExcelToPDFConverter.class);
+
+    public static XSSFWorkbook readExcelFile(String excelFilePath) throws IOException {
+        FileInputStream inputStream = new FileInputStream(excelFilePath);
+        XSSFWorkbook workbook = new XSSFWorkbook(inputStream);
+        inputStream.close();
+        return workbook;
+    }
+
+    private static Document createPDFDocument(String pdfFilePath) throws IOException, DocumentException {
+        Document document = new Document();
+        PdfWriter.getInstance(document, new FileOutputStream(pdfFilePath));
+        document.open();
+        return document;
+    }
+
+    public static void convertExcelToPDF(String excelFilePath, String pdfFilePath) throws IOException, DocumentException {
+        XSSFWorkbook workbook = readExcelFile(excelFilePath);
+        Document document = createPDFDocument(pdfFilePath);
+
+        for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
+            XSSFSheet worksheet = workbook.getSheetAt(i);
+
+            // Add header with sheet name as title
+            Paragraph title = new Paragraph(worksheet.getSheetName(), new Font(Font.FontFamily.HELVETICA, 18, Font.BOLD));
+            title.setSpacingAfter(20f);
+            title.setAlignment(Element.ALIGN_CENTER);
+            document.add(title);
+
+            createAndAddTable(worksheet, document);
+            // Add a new page for each sheet (except the last one)
+            if (i < workbook.getNumberOfSheets() - 1) {
+                document.newPage();
+            }
+        }
+
+        document.close();
+        workbook.close();
+    }
+
+    private static void createAndAddTable(XSSFSheet worksheet, Document document) throws DocumentException, IOException {
+        PdfPTable table = new PdfPTable(worksheet.getRow(0)
+            .getPhysicalNumberOfCells());
+        table.setWidthPercentage(100);
+        addTableHeader(worksheet, table);
+        addTableData(worksheet, table);
+        document.add(table);
+    }
+
+    private static void addTableHeader(XSSFSheet worksheet, PdfPTable table) throws DocumentException, IOException {
+        Row headerRow = worksheet.getRow(0);
+        for (int i = 0; i < headerRow.getPhysicalNumberOfCells(); i++) {
+            Cell cell = headerRow.getCell(i);
+            String headerText = getCellText(cell);
+            PdfPCell headerCell = new PdfPCell(new Phrase(headerText, getCellStyle(cell)));
+            setBackgroundColor(cell, headerCell);
+            setCellAlignment(cell, headerCell);
+            table.addCell(headerCell);
+        }
+    }
+
+    public static String getCellText(Cell cell) {
+        String cellValue;
+        switch (cell.getCellType()) {
+        case STRING:
+            cellValue = cell.getStringCellValue();
+            break;
+        case NUMERIC:
+            cellValue = String.valueOf(BigDecimal.valueOf(cell.getNumericCellValue()));
+            break;
+        case BLANK:
+        default:
+            cellValue = "";
+            break;
+        }
+        return cellValue;
+    }
+
+    private static void addTableData(XSSFSheet worksheet, PdfPTable table) throws DocumentException, IOException {
+        Iterator<Row> rowIterator = worksheet.iterator();
+        while (rowIterator.hasNext()) {
+            Row row = rowIterator.next();
+            if (row.getRowNum() == 0) {
+                continue;
+            }
+            for (int i = 0; i < row.getPhysicalNumberOfCells(); i++) {
+                Cell cell = row.getCell(i);
+                String cellValue = getCellText(cell);
+                PdfPCell cellPdf = new PdfPCell(new Phrase(cellValue, getCellStyle(cell)));
+                setBackgroundColor(cell, cellPdf);
+                setCellAlignment(cell, cellPdf);
+                table.addCell(cellPdf);
+            }
+        }
+    }
+
+    private static void setBackgroundColor(Cell cell, PdfPCell cellPdf) {
+        // Set background color
+        short bgColorIndex = cell.getCellStyle()
+            .getFillForegroundColor();
+        if (bgColorIndex != IndexedColors.AUTOMATIC.getIndex()) {
+            XSSFColor bgColor = (XSSFColor) cell.getCellStyle()
+                .getFillForegroundColorColor();
+            if (bgColor != null) {
+                byte[] rgb = bgColor.getRGB();
+                if (rgb != null && rgb.length == 3) {
+                    cellPdf.setBackgroundColor(new BaseColor(rgb[0] & 0xFF, rgb[1] & 0xFF, rgb[2] & 0xFF));
+                }
+            }
+        }
+    }
+
+    private static void setCellAlignment(Cell cell, PdfPCell cellPdf) {
+        CellStyle cellStyle = cell.getCellStyle();
+
+        HorizontalAlignment horizontalAlignment = cellStyle.getAlignment();
+        VerticalAlignment verticalAlignment = cellStyle.getVerticalAlignment();
+
+        switch (horizontalAlignment) {
+        case LEFT:
+            cellPdf.setHorizontalAlignment(Element.ALIGN_LEFT);
+            break;
+        case CENTER:
+            cellPdf.setHorizontalAlignment(Element.ALIGN_CENTER);
+            break;
+        case JUSTIFY:
+        case FILL:
+            cellPdf.setVerticalAlignment(Element.ALIGN_JUSTIFIED);
+            break;
+        case RIGHT:
+            cellPdf.setHorizontalAlignment(Element.ALIGN_RIGHT);
+            break;
+        }
+
+        switch (verticalAlignment) {
+        case TOP:
+            cellPdf.setVerticalAlignment(Element.ALIGN_TOP);
+            break;
+        case CENTER:
+            cellPdf.setVerticalAlignment(Element.ALIGN_MIDDLE);
+            break;
+        case JUSTIFY:
+            cellPdf.setVerticalAlignment(Element.ALIGN_JUSTIFIED);
+            break;
+        case BOTTOM:
+            cellPdf.setVerticalAlignment(Element.ALIGN_BOTTOM);
+            break;
+        }
+    }
+
+    private static Font getCellStyle(Cell cell) throws DocumentException, IOException {
+        Font font = new Font();
+        CellStyle cellStyle = cell.getCellStyle();
+        org.apache.poi.ss.usermodel.Font cellFont = cell.getSheet()
+            .getWorkbook()
+            .getFontAt(cellStyle.getFontIndexAsInt());
+
+        short fontColorIndex = cellFont.getColor();
+        if (fontColorIndex != IndexedColors.AUTOMATIC.getIndex() && cellFont instanceof XSSFFont) {
+            XSSFColor fontColor = ((XSSFFont) cellFont).getXSSFColor();
+            if (fontColor != null) {
+                byte[] rgb = fontColor.getRGB();
+                if (rgb != null && rgb.length == 3) {
+                    font.setColor(new BaseColor(rgb[0] & 0xFF, rgb[1] & 0xFF, rgb[2] & 0xFF));
+                }
+            }
+        }
+
+        if (cellFont.getItalic()) {
+            font.setStyle(Font.ITALIC);
+        }
+
+        if (cellFont.getStrikeout()) {
+            font.setStyle(Font.STRIKETHRU);
+        }
+
+        if (cellFont.getUnderline() == 1) {
+            font.setStyle(Font.UNDERLINE);
+        }
+
+        short fontSize = cellFont.getFontHeightInPoints();
+        font.setSize(fontSize);
+
+        if (cellFont.getBold()) {
+            font.setStyle(Font.BOLD);
+        }
+
+        String fontName = cellFont.getFontName();
+        if (FontFactory.isRegistered(fontName)) {
+            font.setFamily(fontName); // Use extracted font family if supported by iText
+        } else {
+            logger.warn("Unsupported font type: {}", fontName);
+            // - Use a fallback font (e.g., Helvetica)
+            font.setFamily("Helvetica");
+        }
+
+        return font;
+    }
+
+    public static void main(String[] args) throws DocumentException, IOException {
+        String excelFilePath = "src/main/resources/excelsample.xlsx";
+        String pdfFilePath = "src/main/resources/pdfsample.pdf";
+        convertExcelToPDF(excelFilePath, pdfFilePath);
+    }
+}
@@ -0,0 +1,43 @@
+package com.baeldung.pdfedition;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import com.itextpdf.kernel.geom.Rectangle;
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.kernel.pdf.PdfReader;
+import com.itextpdf.kernel.pdf.PdfWriter;
+import com.itextpdf.pdfcleanup.CleanUpProperties;
+import com.itextpdf.pdfcleanup.PdfCleanUpLocation;
+import com.itextpdf.pdfcleanup.PdfCleanUpTool;
+import com.itextpdf.pdfcleanup.PdfCleaner;
+import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
+import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
+
+public class PdfContentRemover {
+
+    private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
+    private static final String DESTINATION = "src/main/resources/baeldung-cleaned.pdf";
+
+    public static void main(String[] args) throws IOException {
+        PdfReader reader = new PdfReader(SOURCE);
+        PdfWriter writer = new PdfWriter(DESTINATION);
+        PdfDocument pdfDocument = new PdfDocument(reader, writer);
+        removeContentFromDocument(pdfDocument);
+        pdfDocument.close();
+    }
+
+    private static void removeContentFromDocument(PdfDocument pdfDocument) throws IOException {
+        // 5.1. remove text
+        CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
+        strategy.add(new RegexBasedCleanupStrategy("Baeldung"));
+        PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
+
+        // 5.2. remove other areas
+        List<PdfCleanUpLocation> cleanUpLocations = Arrays.asList(new PdfCleanUpLocation(1, new Rectangle(10, 50, 90, 70)), new PdfCleanUpLocation(2, new Rectangle(35, 400, 100, 35)));
+        PdfCleanUpTool cleaner = new PdfCleanUpTool(pdfDocument, cleanUpLocations, new CleanUpProperties());
+        cleaner.cleanUp();
+    }
+
+}
@@ -0,0 +1,86 @@
+package com.baeldung.pdfedition;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+
+import com.itextpdf.forms.PdfAcroForm;
+import com.itextpdf.forms.fields.PdfFormField;
+import com.itextpdf.forms.fields.PdfTextFormField;
+import com.itextpdf.io.image.ImageData;
+import com.itextpdf.io.image.ImageDataFactory;
+import com.itextpdf.kernel.geom.Rectangle;
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.kernel.pdf.PdfReader;
+import com.itextpdf.kernel.pdf.PdfString;
+import com.itextpdf.kernel.pdf.PdfWriter;
+import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
+import com.itextpdf.kernel.pdf.annot.PdfTextAnnotation;
+import com.itextpdf.layout.Document;
+import com.itextpdf.layout.element.Image;
+import com.itextpdf.layout.element.Paragraph;
+import com.itextpdf.layout.element.Table;
+import com.itextpdf.layout.element.Text;
+import com.itextpdf.layout.properties.UnitValue;
+
+public class PdfEditor {
+
+    private static final String SOURCE = "src/main/resources/baeldung.pdf";
+    private static final String DESTINATION = "src/main/resources/baeldung-modified.pdf";
+
+    public static void main(String[] args) throws IOException {
+        PdfReader reader = new PdfReader(SOURCE);
+        PdfWriter writer = new PdfWriter(DESTINATION);
+        PdfDocument pdfDocument = new PdfDocument(reader, writer);
+        addContentToDocument(pdfDocument);
+    }
+
+    private static void addContentToDocument(PdfDocument pdfDocument) throws MalformedURLException {
+        // 4.1. add form
+        PdfFormField personal = PdfFormField.createEmptyField(pdfDocument);
+        personal.setFieldName("information");
+        PdfTextFormField name = PdfFormField.createText(pdfDocument, new Rectangle(35, 400, 100, 30), "name", "");
+        personal.addKid(name);
+        PdfAcroForm.getAcroForm(pdfDocument, true)
+            .addField(personal, pdfDocument.getFirstPage());
+
+        // 4.2. add new page
+        pdfDocument.addNewPage(1);
+
+        // 4.3. add annotation
+        PdfAnnotation ann = new PdfTextAnnotation(new Rectangle(40, 435, 0, 0)).setTitle(new PdfString("name"))
+            .setContents("Your name");
+        pdfDocument.getPage(2)
+            .addAnnotation(ann);
+
+        // create document form pdf document
+        Document document = new Document(pdfDocument);
+
+        // 4.4. add an image
+        ImageData imageData = ImageDataFactory.create("src/main/resources/baeldung.png");
+        Image image = new Image(imageData).scaleAbsolute(550, 100)
+            .setFixedPosition(1, 10, 50);
+        document.add(image);
+
+        // 4.5. add a paragraph
+        Text title = new Text("This is a demo").setFontSize(16);
+        Text author = new Text("Baeldung tutorials.");
+        Paragraph p = new Paragraph().setFontSize(8)
+            .add(title)
+            .add(" from ")
+            .add(author);
+        document.add(p);
+
+        // 4.6. add a table
+        Table table = new Table(UnitValue.createPercentArray(2));
+        table.addHeaderCell("#");
+        table.addHeaderCell("company");
+        table.addCell("name");
+        table.addCell("baeldung");
+        document.add(table);
+
+        // close the document
+        // this automatically closes the pdfDocument, which then closes automatically the pdfReader and pdfWriter
+        document.close();
+    }
+
+}
@@ -0,0 +1,45 @@
+package com.baeldung.pdfedition;
+
+import java.io.IOException;
+
+import com.itextpdf.kernel.colors.ColorConstants;
+import com.itextpdf.kernel.pdf.PdfDocument;
+import com.itextpdf.kernel.pdf.PdfPage;
+import com.itextpdf.kernel.pdf.PdfReader;
+import com.itextpdf.kernel.pdf.PdfWriter;
+import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
+import com.itextpdf.kernel.pdf.canvas.parser.listener.IPdfTextLocation;
+import com.itextpdf.layout.Canvas;
+import com.itextpdf.layout.element.Paragraph;
+import com.itextpdf.pdfcleanup.PdfCleaner;
+import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
+import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
+
+public class PdfTextReplacement {
+
+    private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
+    private static final String DESTINATION = "src/main/resources/baeldung-fixed.pdf";
+
+    public static void main(String[] args) throws IOException {
+        PdfReader reader = new PdfReader(SOURCE);
+        PdfWriter writer = new PdfWriter(DESTINATION);
+        PdfDocument pdfDocument = new PdfDocument(reader, writer);
+        replaceTextContentFromDocument(pdfDocument);
+        pdfDocument.close();
+    }
+
+    private static void replaceTextContentFromDocument(PdfDocument pdfDocument) throws IOException {
+        CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
+        strategy.add(new RegexBasedCleanupStrategy("Baeldung tutorials").setRedactionColor(ColorConstants.WHITE));
+        PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
+
+        for (IPdfTextLocation location : strategy.getResultantLocations()) {
+            PdfPage page = pdfDocument.getPage(location.getPageNumber() + 1);
+            PdfCanvas pdfCanvas = new PdfCanvas(page.newContentStreamAfter(), page.getResources(), page.getDocument());
+            Canvas canvas = new Canvas(pdfCanvas, location.getRectangle());
+            canvas.add(new Paragraph("HIDDEN").setFontSize(8)
+                .setMarginTop(0f));
+        }
+    }
+
+}
@@ -0,0 +1,31 @@
+package com.baeldung.pdfinfo;
+
+
+import com.itextpdf.text.pdf.PdfReader;
+
+import java.io.IOException;
+import java.util.Map;
+
+public class PdfInfoIText {
+
+    public static int getNumberOfPages(final String pdfFile) throws IOException {
+        PdfReader reader = new PdfReader(pdfFile);
+        int pages = reader.getNumberOfPages();
+        reader.close();
+        return pages;
+    }
+
+    public static boolean isPasswordRequired(final String pdfFile) throws IOException {
+        PdfReader reader = new PdfReader(pdfFile);
+        boolean isEncrypted = reader.isEncrypted();
+        reader.close();
+        return isEncrypted;
+    }
+
+    public static Map<String, String> getInfo(final String pdfFile) throws IOException {
+        PdfReader reader = new PdfReader(pdfFile);
+        Map<String, String> info = reader.getInfo();
+        reader.close();
+        return info;
+    }
+}
@@ -0,0 +1,36 @@
+package com.baeldung.pdfinfo;
+
+
+import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDDocumentInformation;
+
+import java.io.File;
+import java.io.IOException;
+
+public class PdfInfoPdfBox {
+
+    public static int getNumberOfPages(final String pdfFile) throws IOException {
+        File file = new File(pdfFile);
+        PDDocument document = Loader.loadPDF(file);
+        int pages = document.getNumberOfPages();
+        document.close();
+        return pages;
+    }
+
+    public static boolean isPasswordRequired(final String pdfFile) throws IOException {
+        File file = new File(pdfFile);
+        PDDocument document = Loader.loadPDF(file);
+        boolean isEncrypted = document.isEncrypted();
+        document.close();
+        return isEncrypted;
+    }
+
+    public static PDDocumentInformation getInfo(final String pdfFile) throws IOException {
+        File file = new File(pdfFile);
+        PDDocument document = Loader.loadPDF(file);
+        PDDocumentInformation info = document.getDocumentInformation();
+        document.close();
+        return info;
+    }
+}
@@ -0,0 +1,43 @@
+%PDF-1.4
+%âãÏÓ
+3 0 obj
+<</Length 751/Filter/FlateDecode>>stream
+xœ•WËRÛ0Ýû+î’.Puõ´–@Ÿ”R žÎtéi¤Íc0d:ü}%K&a¢«�daœãsïÑ‘tdªÓ¦’jn ™VXæ,îÞBÀšÛêèG?íz¸˜=>ÁU{×¾kþTÇéqÚyÞðçhøe¸çpŒj¨ÿ±©®«‡Š3'œBàL9.ü�@ý]�b}säŠ�ú®º-<ŽŠ³ÃBi&bHåX}CiÅT††.àÜûñÙ³5üË
+w’ üÐF4?ŒÍKÑ¼¼É–$îägW�&Iš$h’Dq
+'GÈìJ¢Ð(‰B£$
+�’H®ojGÈ¯ú]Iš$h’D IÅõMµ-¹D I�&Iš$Q\ßÔï�‚$M’4I"Ð$‰âú¦øb`fySh’D I�&I;èuH_Ÿ þ‹`-ãaK;h1zUˆ^3\×5„+:órõ9st¹b!i9Ü…dÝª…F±ÚV,Fü×ùŠ"T”`•|{Å³õãÓjÑõcÅ—Z¶fJ�êŠl]^�¬5“õa#lVOí<_N9ôq~X¹«v6¥Ä�EŒ• *
+Xà!ãÄ¬)òu‹©^3O�û3¤lÈ_ºïÉrúíšp@8Æu‰¯
+«ó\Í³%ê/Ò'ã4EeŸ
+Ìè“(ùT`ŸwËå3|Y}*ðýêv´OÒ–¸—´Q–$É=F�Ì�9£dÑ(ºïÅzö?)›ÛV�‰�›mÀ÷úD·.ø¤ER{|¢™Ñ'Uô‰fë–¼*/(í^;åQfâ›¥DéýaOH¦K{° ¢°ÇPÏìùW–ÅßoÂÛk=˜ƒmÒÁ¢BçÕªáfÎ=ÿŠ‹þ‘]“é^ÑdbxÉdšý½]®»9Lœ¶7gNÞkºŒÙrjÁ`$²yƒÁ>W7û›²Át¯h°)þóË³ÏîÛ~>ëà†íœç‘o“ß÷};Ý7´\©‘Li*hêöTüˆ
+endstream
+endobj
+5 0 obj
+<</Type/Page/MediaBox[0 0 595 842]/Resources<</Font<</F1 1 0 R/F2 2 0 R>>>>/Contents 3 0 R/Parent 4 0 R>>
+endobj
+1 0 obj
+<</Type/Font/Subtype/Type1/BaseFont/Helvetica-Bold/Encoding/WinAnsiEncoding>>
+endobj
+2 0 obj
+<</Type/Font/Subtype/Type1/BaseFont/Helvetica/Encoding/WinAnsiEncoding>>
+endobj
+4 0 obj
+<</Type/Pages/Count 1/Kids[5 0 R]>>
+endobj
+6 0 obj
+<</Type/Catalog/Pages 4 0 R>>
+endobj
+7 0 obj
+<</Producer(iText® 5.5.13.3 ©2000-2022 iText Group NV \(AGPL-version\))/CreationDate(D:20231213174247+08'00')/ModDate(D:20231213174247+08'00')>>
+endobj
+xref
+0 8
+0000000000 65535 f 
+0000000954 00000 n 
+0000001047 00000 n 
+0000000015 00000 n 
+0000001135 00000 n 
+0000000833 00000 n 
+0000001186 00000 n 
+0000001231 00000 n 
+trailer
+<</Size 8/Root 6 0 R/Info 7 0 R/ID [<6a28b1036b62f3808f3bfb62a88a5239><6a28b1036b62f3808f3bfb62a88a5239>]>>
@@ -0,0 +1,31 @@
+package com.baeldung.pdfinfo;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.junit.jupiter.api.Test;
+
+class PdfInfoITextUnitTest {
+
+    private static final String PDF_FILE = "src/test/resources/input.pdf";
+
+    @Test
+    void givenPdf_whenGetNumberOfPages_thenOK() throws IOException {
+        assertEquals(4, PdfInfoIText.getNumberOfPages(PDF_FILE));
+    }
+
+    @Test
+    void givenPdf_whenIsPasswordRequired_thenOK() throws IOException {
+        assertFalse(PdfInfoIText.isPasswordRequired(PDF_FILE));
+    }
+
+    @Test
+    void givenPdf_whenGetInfo_thenOK() throws IOException {
+        Map<String, String> info = PdfInfoIText.getInfo(PDF_FILE);
+        assertEquals("LibreOffice 4.2", info.get("Producer"));
+        assertEquals("Writer", info.get("Creator"));
+    }
+}
@@ -0,0 +1,31 @@
+package com.baeldung.pdfinfo;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+
+import org.apache.pdfbox.pdmodel.PDDocumentInformation;
+import org.junit.jupiter.api.Test;
+
+import java.io.IOException;
+
+class PdfInfoPdfBoxUnitTest {
+
+    private static final String PDF_FILE = "src/test/resources/input.pdf";
+
+    @Test
+    void givenPdf_whenGetNumberOfPages_thenOK() throws IOException {
+        assertEquals(4, PdfInfoPdfBox.getNumberOfPages(PDF_FILE));
+    }
+
+    @Test
+    void givenPdf_whenIsPasswordRequired_thenOK() throws IOException {
+        assertFalse(PdfInfoPdfBox.isPasswordRequired(PDF_FILE));
+    }
+
+    @Test
+    void givenPdf_whenGetInfo_thenOK() throws IOException {
+        PDDocumentInformation info = PdfInfoPdfBox.getInfo(PDF_FILE);
+        assertEquals("LibreOffice 4.2", info.getProducer());
+        assertEquals("Writer", info.getCreator());
+    }
+}
@@ -0,0 +1 @@
+/target/
@@ -0,0 +1,11 @@
+## PDF
+
+This module contains articles about PDF files.
+
+### Relevant Articles:
+- [PDF Conversions in Java](https://www.baeldung.com/pdf-conversions-java)
+- [Creating PDF Files in Java](https://www.baeldung.com/java-pdf-creation)
+- [Generating PDF Files Using Thymeleaf](https://www.baeldung.com/thymeleaf-generate-pdf)
+- [Java Convert PDF to Base64](https://www.baeldung.com/java-convert-pdf-to-base64)
+- [HTML to PDF Using OpenPDF](https://www.baeldung.com/java-html-to-pdf)
+- [Reading PDF File Using Java](https://www.baeldung.com/java-pdf-file-read)
@@ -0,0 +1,122 @@
+<?xml version="1.0"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>pdf</artifactId>
+    <name>pdf</name>
+    <url>http://maven.apache.org</url>
+
+    <parent>
+        <groupId>com.baeldung</groupId>
+        <artifactId>text-processing-libraries-modules</artifactId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.pdfbox</groupId>
+            <artifactId>pdfbox-tools</artifactId>
+            <version>${pdfbox-tools.version}</version>
+            <exclusions>
+                <exclusion>
+                    <artifactId>commons-logging</artifactId>
+                    <groupId>commons-logging</groupId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>net.sf.cssbox</groupId>
+            <artifactId>pdf2dom</artifactId>
+            <version>${pdf2dom.version}</version>
+            <exclusions>
+                <exclusion>
+                    <artifactId>commons-logging</artifactId>
+                    <groupId>commons-logging</groupId>
+                </exclusion>
+            </exclusions>
+        </dependency>
+        <dependency>
+            <groupId>com.itextpdf</groupId>
+            <artifactId>itextpdf</artifactId>
+            <version>${itextpdf.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.itextpdf.tool</groupId>
+            <artifactId>xmlworker</artifactId>
+            <version>${xmlworker.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-scratchpad</artifactId>
+            <version>${poi-scratchpad.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.xmlgraphics</groupId>
+            <artifactId>batik-transcoder</artifactId>
+            <version>${batik-transcoder.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.apache.poi</groupId>
+            <artifactId>poi-ooxml</artifactId>
+            <version>${poi-ooxml.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.thymeleaf</groupId>
+            <artifactId>thymeleaf</artifactId>
+            <version>${thymeleaf.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.xhtmlrenderer</groupId>
+            <artifactId>flying-saucer-pdf</artifactId>
+            <version>${flying-saucer-pdf.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.xhtmlrenderer</groupId>
+            <artifactId>flying-saucer-pdf-openpdf</artifactId>
+            <version>${flying-saucer-pdf-openpdf.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>org.jsoup</groupId>
+            <artifactId>jsoup</artifactId>
+            <version>${jsoup.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.openhtmltopdf</groupId>
+            <artifactId>openhtmltopdf-core</artifactId>
+            <version>${open-html-pdf-core.version}</version>
+        </dependency>
+        <dependency>
+            <groupId>com.openhtmltopdf</groupId>
+            <artifactId>openhtmltopdf-pdfbox</artifactId>
+            <version>${open-html-pdfbox.version}</version>
+        </dependency>
+    </dependencies>
+
+    <build>
+        <finalName>pdf</finalName>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+            </resource>
+        </resources>
+    </build>
+
+    <properties>
+        <pdfbox-tools.version>3.0.0</pdfbox-tools.version>
+        <pdf2dom.version>2.0.1</pdf2dom.version>
+        <itextpdf.version>5.5.13.3</itextpdf.version>
+        <xmlworker.version>5.5.10</xmlworker.version>
+        <poi-scratchpad.version>3.15</poi-scratchpad.version>
+        <batik-transcoder.version>1.8</batik-transcoder.version>
+        <poi-ooxml.version>3.15</poi-ooxml.version>
+        <thymeleaf.version>3.1.2.RELEASE</thymeleaf.version>
+        <flying-saucer-pdf.version>9.3.1</flying-saucer-pdf.version>
+        <open-html-pdfbox.version>1.0.6</open-html-pdfbox.version>
+        <open-html-pdf-core.version>1.0.10</open-html-pdf-core.version>
+        <flying-saucer-pdf-openpdf.version>9.2.1</flying-saucer-pdf-openpdf.version>
+        <jsoup.version>1.16.2</jsoup.version>
+    </properties>
+
+</project>
@@ -0,0 +1,35 @@
+package com.baeldung.pdf;
+
+import com.itextpdf.text.Document;
+import com.itextpdf.text.DocumentException;
+import com.itextpdf.text.Paragraph;
+import com.itextpdf.text.pdf.PdfWriter;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.util.List;
+
+public class DocxToPDFExample {
+
+    public static void main(String[] args) throws IOException, DocumentException {
+        InputStream docxInputStream = new FileInputStream("input.docx");
+        try (XWPFDocument document = new XWPFDocument(docxInputStream); 
+            OutputStream pdfOutputStream = new FileOutputStream("output.pdf");) {
+            Document pdfDocument = new Document();
+            PdfWriter.getInstance(pdfDocument, pdfOutputStream);
+            pdfDocument.open();
+
+            List<XWPFParagraph> paragraphs = document.getParagraphs();
+            for (XWPFParagraph paragraph : paragraphs) {
+                pdfDocument.add(new Paragraph(paragraph.getText()));
+            }
+            pdfDocument.close();
+        }
+    }
+}
@@ -0,0 +1,52 @@
+package com.baeldung.pdf;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.io.Writer;
+
+import javax.xml.parsers.ParserConfigurationException;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.fit.pdfdom.PDFDomTree;
+
+import com.itextpdf.text.Document;
+import com.itextpdf.text.DocumentException;
+import com.itextpdf.text.pdf.PdfWriter;
+import com.itextpdf.tool.xml.XMLWorkerHelper;
+
+public class PDF2HTMLExample {
+
+	private static final String PDF = "src/main/resources/pdf.pdf";
+	private static final String HTML = "src/main/resources/html.html";
+
+	public static void main(String[] args) {
+		try {
+			generateHTMLFromPDF(PDF);
+			generatePDFFromHTML(HTML);
+		} catch (IOException | ParserConfigurationException | DocumentException e) {
+			e.printStackTrace();
+		}
+	}
+
+	private static void generateHTMLFromPDF(String filename) throws ParserConfigurationException, IOException {
+		PDDocument pdf = PDDocument.load(new File(filename));
+		PDFDomTree parser = new PDFDomTree();
+		Writer output = new PrintWriter("src/output/pdf.html", "utf-8");
+		parser.writeText(pdf, output);
+		output.close();
+		if (pdf != null) {
+			pdf.close();
+		}
+	}
+
+	private static void generatePDFFromHTML(String filename) throws ParserConfigurationException, IOException, DocumentException {
+		Document document = new Document();
+		PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream("src/output/html.pdf"));
+		document.open();
+		XMLWorkerHelper.getInstance().parseXHtml(writer, document, new FileInputStream(filename));
+		document.close();
+	}
+}
@@ -0,0 +1,62 @@
+package com.baeldung.pdf;
+
+import java.awt.image.BufferedImage;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.URL;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.rendering.ImageType;
+import org.apache.pdfbox.rendering.PDFRenderer;
+import org.apache.pdfbox.tools.imageio.ImageIOUtil;
+
+import com.itextpdf.text.BadElementException;
+import com.itextpdf.text.Document;
+import com.itextpdf.text.DocumentException;
+import com.itextpdf.text.Image;
+import com.itextpdf.text.pdf.PdfWriter;
+
+public class PDF2ImageExample {
+
+	private static final String PDF = "src/main/resources/pdf.pdf";
+	private static final String JPG = "http://cdn2.baeldung.netdna-cdn.com/wp-content/uploads/2016/05/baeldung-rest-widget-main-1.2.0";
+	private static final String GIF = "https://media.giphy.com/media/l3V0x6kdXUW9M4ONq/giphy";
+
+	public static void main(String[] args) {
+		try {
+			generateImageFromPDF(PDF, "png");
+			generateImageFromPDF(PDF, "jpeg");
+			generateImageFromPDF(PDF, "gif");
+			generatePDFFromImage(JPG, "jpg");
+			generatePDFFromImage(GIF, "gif");
+		} catch (IOException | DocumentException e) {
+			e.printStackTrace();
+		}
+	}
+
+	private static void generateImageFromPDF(String filename, String extension) throws IOException {
+		PDDocument document = PDDocument.load(new File(filename));
+		PDFRenderer pdfRenderer = new PDFRenderer(document);
+		for (int page = 0; page < document.getNumberOfPages(); ++page) {
+			BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
+			ImageIOUtil.writeImage(bim, String.format("src/output/pdf-%d.%s", page + 1, extension), 300);
+		}
+		document.close();
+	}
+
+	private static void generatePDFFromImage(String filename, String extension)
+			throws IOException, BadElementException, DocumentException {
+		Document document = new Document();
+		String input = filename + "." + extension;
+		String output = "src/output/" + extension + ".pdf";
+		FileOutputStream fos = new FileOutputStream(output);
+		PdfWriter writer = PdfWriter.getInstance(document, fos);
+		writer.open();
+		document.open();
+		document.add(Image.getInstance((new URL(input))));
+		document.close();
+		writer.close();
+	}
+
+}
@@ -0,0 +1,84 @@
+package com.baeldung.pdf;
+
+import java.io.BufferedReader;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileReader;
+import java.io.IOException;
+import java.io.PrintWriter;
+
+import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.io.RandomAccessFile;
+import org.apache.pdfbox.pdfparser.PDFParser;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+
+import com.itextpdf.text.Document;
+import com.itextpdf.text.DocumentException;
+import com.itextpdf.text.Element;
+import com.itextpdf.text.Font;
+import com.itextpdf.text.PageSize;
+import com.itextpdf.text.Paragraph;
+import com.itextpdf.text.pdf.PdfWriter;
+
+public class PDF2TextExample {
+
+	private static final String PDF = "src/main/resources/pdf.pdf";
+	private static final String TXT = "src/main/resources/txt.txt";
+
+	public static void main(String[] args) {
+		try {
+			generateTxtFromPDF(PDF);
+			generatePDFFromTxt(TXT);
+		} catch (IOException | DocumentException e) {
+			e.printStackTrace();
+		}
+	}
+
+	private static void generateTxtFromPDF(String filename) throws IOException {
+		File f = new File(filename);
+		String parsedText;
+		PDFParser parser = new PDFParser(new RandomAccessFile(f, "r"));
+		parser.parse();
+
+		COSDocument cosDoc = parser.getDocument();
+
+		PDFTextStripper pdfStripper = new PDFTextStripper();
+		PDDocument pdDoc = new PDDocument(cosDoc);
+
+		parsedText = pdfStripper.getText(pdDoc);
+
+		if (cosDoc != null)
+			cosDoc.close();
+		if (pdDoc != null)
+			pdDoc.close();
+
+		PrintWriter pw = new PrintWriter("src/output/pdf.txt");
+		pw.print(parsedText);
+		pw.close();
+	}
+
+	private static void generatePDFFromTxt(String filename) throws IOException, DocumentException {
+		Document pdfDoc = new Document(PageSize.A4);
+		PdfWriter.getInstance(pdfDoc, new FileOutputStream("src/output/txt.pdf"))
+				.setPdfVersion(PdfWriter.PDF_VERSION_1_7);
+		pdfDoc.open();
+		
+		Font myfont = new Font();
+		myfont.setStyle(Font.NORMAL);
+		myfont.setSize(11);
+		pdfDoc.add(new Paragraph("\n"));
+		
+		BufferedReader br = new BufferedReader(new FileReader(filename));
+		String strLine;
+		while ((strLine = br.readLine()) != null) {
+			Paragraph para = new Paragraph(strLine + "\n", myfont);
+			para.setAlignment(Element.ALIGN_JUSTIFIED);
+			pdfDoc.add(para);
+		}
+		
+		pdfDoc.close();
+		br.close();
+	}
+
+}
@@ -0,0 +1,50 @@
+package com.baeldung.pdf;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+import org.apache.poi.xwpf.usermodel.BreakType;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
+import org.apache.poi.xwpf.usermodel.XWPFRun;
+
+import com.itextpdf.text.pdf.PdfReader;
+import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
+import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
+import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
+
+public class PDF2WordExample {
+
+	private static final String FILENAME = "src/main/resources/pdf.pdf";
+
+	public static void main(String[] args) {
+		try {
+			generateDocFromPDF(FILENAME);
+		} catch (IOException e) {
+			e.printStackTrace();
+		}
+	}
+
+	private static void generateDocFromPDF(String filename) throws IOException {
+		XWPFDocument doc = new XWPFDocument();
+
+		String pdf = filename;
+		PdfReader reader = new PdfReader(pdf);
+		PdfReaderContentParser parser = new PdfReaderContentParser(reader);
+
+		for (int i = 1; i <= reader.getNumberOfPages(); i++) {
+			TextExtractionStrategy strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
+			String text = strategy.getResultantText();
+			XWPFParagraph p = doc.createParagraph();
+			XWPFRun run = p.createRun();
+			run.setText(text);
+			run.addBreak(BreakType.PAGE);
+		}
+		FileOutputStream out = new FileOutputStream("src/output/pdf.docx");
+		doc.write(out);
+		out.close();
+		reader.close();
+		doc.close();
+	}
+
+}
@@ -0,0 +1,77 @@
+package com.baeldung.pdf;
+
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.net.URISyntaxException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.stream.Stream;
+
+import com.itextpdf.text.BadElementException;
+import com.itextpdf.text.BaseColor;
+import com.itextpdf.text.Document;
+import com.itextpdf.text.Element;
+import com.itextpdf.text.Image;
+import com.itextpdf.text.Phrase;
+import com.itextpdf.text.pdf.PdfPCell;
+import com.itextpdf.text.pdf.PdfPTable;
+import com.itextpdf.text.pdf.PdfWriter;
+
+public class PDFSampleMain {
+
+    public static void main(String[] args) {
+        
+        try {
+            
+            Document document = new Document();
+            PdfWriter.getInstance(document, new FileOutputStream("iTextTable.pdf"));
+
+            document.open();
+
+            PdfPTable table = new PdfPTable(3);
+            addTableHeader(table);
+            addRows(table);
+            addCustomRows(table);
+
+            document.add(table);
+            document.close();
+            
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private static void addTableHeader(PdfPTable table) {
+        Stream.of("column header 1", "column header 2", "column header 3")
+        .forEach(columnTitle -> {
+            PdfPCell header = new PdfPCell();
+            header.setBackgroundColor(BaseColor.LIGHT_GRAY);
+            header.setBorderWidth(2);
+            header.setPhrase(new Phrase(columnTitle));
+            table.addCell(header);
+        });
+    }
+
+    private static void addRows(PdfPTable table) {
+        table.addCell("row 1, col 1");
+        table.addCell("row 1, col 2");
+        table.addCell("row 1, col 3");
+    }
+
+    private static void addCustomRows(PdfPTable table) throws URISyntaxException, BadElementException, IOException {
+        Path path = Paths.get(ClassLoader.getSystemResource("Java_logo.png").toURI());
+        Image img = Image.getInstance(path.toAbsolutePath().toString());
+        img.scalePercent(10);
+
+        PdfPCell imageCell = new PdfPCell(img);
+        table.addCell(imageCell);
+
+        PdfPCell horizontalAlignCell = new PdfPCell(new Phrase("row 2, col 2"));
+        horizontalAlignCell.setHorizontalAlignment(Element.ALIGN_CENTER);
+        table.addCell(horizontalAlignCell);
+
+        PdfPCell verticalAlignCell = new PdfPCell(new Phrase("row 2, col 3"));
+        verticalAlignCell.setVerticalAlignment(Element.ALIGN_BOTTOM);
+        table.addCell(verticalAlignCell);
+    }
+}
@@ -0,0 +1,56 @@
+package com.baeldung.pdf.openpdf;
+
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import org.apache.commons.io.IOUtils;
+import org.w3c.dom.Element;
+import org.xhtmlrenderer.extend.FSImage;
+import org.xhtmlrenderer.extend.ReplacedElement;
+import org.xhtmlrenderer.extend.ReplacedElementFactory;
+import org.xhtmlrenderer.extend.UserAgentCallback;
+import org.xhtmlrenderer.layout.LayoutContext;
+import org.xhtmlrenderer.pdf.ITextFSImage;
+import org.xhtmlrenderer.pdf.ITextImageElement;
+import org.xhtmlrenderer.render.BlockBox;
+import org.xhtmlrenderer.simple.extend.FormSubmissionListener;
+
+import com.lowagie.text.Image;
+
+public class CustomElementFactoryImpl implements ReplacedElementFactory {
+    @Override
+    public ReplacedElement createReplacedElement(LayoutContext lc, BlockBox box, UserAgentCallback uac, int cssWidth, int cssHeight) {
+        Element e = box.getElement();
+        String nodeName = e.getNodeName();
+        if (nodeName.equals("img")) {
+            String imagePath = e.getAttribute("src");
+            try {
+                InputStream input = new FileInputStream("src/main/resources/" + imagePath);
+                byte[] bytes = IOUtils.toByteArray(input);
+                Image image = Image.getInstance(bytes);
+                FSImage fsImage = new ITextFSImage(image);
+                if (cssWidth != -1 || cssHeight != -1) {
+                    fsImage.scale(cssWidth, cssHeight);
+                } else {
+                    fsImage.scale(2000, 1000);
+                }
+                return new ITextImageElement(fsImage);
+            } catch (Exception e1) {
+                e1.printStackTrace();
+            }
+        }
+        return null;
+    }
+
+    @Override
+    public void reset() {
+    }
+
+    @Override
+    public void remove(Element e) {
+    }
+
+    @Override
+    public void setFormSubmissionListener(FormSubmissionListener listener) {
+    }
+}
@@ -0,0 +1,53 @@
+package com.baeldung.pdf.openpdf;
+
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.xhtmlrenderer.layout.SharedContext;
+import org.xhtmlrenderer.pdf.ITextRenderer;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+public class Html2PdfUsingFlyingSaucer {
+
+    private static final String HTML_INPUT = "src/main/resources/htmlforopenpdf.html";
+    private static final String PDF_OUTPUT = "src/main/resources/html2pdf.pdf";
+
+    public static void main(String[] args) {
+        try {
+            Html2PdfUsingFlyingSaucer htmlToPdf = new Html2PdfUsingFlyingSaucer();
+            htmlToPdf.generateHtmlToPdf();
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private void generateHtmlToPdf() throws Exception {
+        File inputHTML = new File(HTML_INPUT);
+        Document inputHtml = createWellFormedHtml(inputHTML);
+        File outputPdf = new File(PDF_OUTPUT);
+        xhtmlToPdf(inputHtml, outputPdf);
+    }
+
+    private Document createWellFormedHtml(File inputHTML) throws IOException {
+        Document document = Jsoup.parse(inputHTML, "UTF-8");
+        document.outputSettings()
+            .syntax(Document.OutputSettings.Syntax.xml);
+        return document;
+    }
+
+    private void xhtmlToPdf(Document xhtml, File outputPdf) throws Exception {
+        try (OutputStream outputStream = new FileOutputStream(outputPdf)) {
+            ITextRenderer renderer = new ITextRenderer();
+            SharedContext sharedContext = renderer.getSharedContext();
+            sharedContext.setPrint(true);
+            sharedContext.setInteractive(false);
+            sharedContext.setReplacedElementFactory(new CustomElementFactoryImpl());
+            renderer.setDocumentFromString(xhtml.html());
+            renderer.layout();
+            renderer.createPDF(outputStream);
+        }
+    }
+}
@@ -0,0 +1,55 @@
+package com.baeldung.pdf.openpdf;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.file.FileSystems;
+
+import org.jsoup.Jsoup;
+import org.jsoup.helper.W3CDom;
+import org.jsoup.nodes.Document;
+
+import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;
+
+public class Html2PdfUsingOpenHtml {
+
+    private static final String HTML_INPUT = "src/main/resources/htmlforopenpdf.html";
+    private static final String PDF_OUTPUT = "src/main/resources/html2pdf.pdf";
+
+    public static void main(String[] args) {
+        try {
+            Html2PdfUsingOpenHtml htmlToPdf = new Html2PdfUsingOpenHtml();
+            htmlToPdf.generateHtmlToPdf();
+        } catch (Exception e) {
+            e.printStackTrace();
+        }
+    }
+
+    private void generateHtmlToPdf() throws IOException {
+        File inputHTML = new File(HTML_INPUT);
+        Document doc = createWellFormedHtml(inputHTML);
+        xhtmlToPdf(doc, PDF_OUTPUT);
+    }
+
+    private Document createWellFormedHtml(File inputHTML) throws IOException {
+        Document document = Jsoup.parse(inputHTML, "UTF-8");
+        document.outputSettings()
+            .syntax(Document.OutputSettings.Syntax.xml);
+        return document;
+    }
+
+    private void xhtmlToPdf(Document doc, String outputPdf) throws IOException {
+        try (OutputStream os = new FileOutputStream(outputPdf)) {
+            String baseUri = FileSystems.getDefault()
+                .getPath("src/main/resources/")
+                .toUri()
+                .toString();
+            PdfRendererBuilder builder = new PdfRendererBuilder();
+            builder.withUri(outputPdf);
+            builder.toStream(os);
+            builder.withW3cDocument(new W3CDom().fromJsoup(doc), baseUri);
+            builder.run();
+        }
+    }
+}
@@ -0,0 +1,48 @@
+package com.baeldung.pdfthymeleaf;
+
+import com.lowagie.text.DocumentException;
+import org.thymeleaf.TemplateEngine;
+import org.thymeleaf.context.Context;
+import org.thymeleaf.templatemode.TemplateMode;
+import org.thymeleaf.templateresolver.ClassLoaderTemplateResolver;
+import org.xhtmlrenderer.pdf.ITextRenderer;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+
+public class PDFThymeleafExample {
+
+    public static void main(String[] args) throws IOException, DocumentException {
+        PDFThymeleafExample thymeleaf2Pdf = new PDFThymeleafExample();
+        String html = thymeleaf2Pdf.parseThymeleafTemplate();
+        thymeleaf2Pdf.generatePdfFromHtml(html);
+    }
+
+    public void generatePdfFromHtml(String html) throws IOException, DocumentException {
+        String outputFolder = System.getProperty("user.home") + File.separator + "thymeleaf.pdf";
+        OutputStream outputStream = new FileOutputStream(outputFolder);
+
+        ITextRenderer renderer = new ITextRenderer();
+        renderer.setDocumentFromString(html);
+        renderer.layout();
+        renderer.createPDF(outputStream);
+
+        outputStream.close();
+    }
+
+    private String parseThymeleafTemplate() {
+        ClassLoaderTemplateResolver templateResolver = new ClassLoaderTemplateResolver();
+        templateResolver.setSuffix(".html");
+        templateResolver.setTemplateMode(TemplateMode.HTML);
+
+        TemplateEngine templateEngine = new TemplateEngine();
+        templateEngine.setTemplateResolver(templateResolver);
+
+        Context context = new Context();
+        context.setVariable("to", "Baeldung.com");
+
+        return templateEngine.process("thymeleaf_template", context);
+    }
+}
@@ -0,0 +1,53 @@
+<!DOCTYPE html>
+<html>
+
+<head>
+<title>A very simple webpage</title>
+</head>
+
+<body>
+<h1>A very simple webpage. This is an "h1" level header.</h1>
+
+<h2>This is a level h2 header.</h2>
+
+<h6>This is a level h6 header.  Pretty small!</h6>
+
+<p>This is a standard paragraph.</p>
+
+<p align=center>Now I've aligned it in the center of the screen.</p>
+
+<p align=right>Now aligned to the right</p>
+
+<p><b>Bold text</b></p>
+
+<p><strong>Strongly emphasized text</strong>  Can you tell the difference vs. bold?</p>
+
+<p><i>Italics</i></p>
+
+<p><em>Emphasized text</em>  Just like Italics!</p>
+
+<h2>How about a nice ordered list!</h2>
+<ol>
+  <li>This little piggy went to market</li>
+  <li>This little piggy went to SB228 class</li>
+  <li>This little piggy went to an expensive restaurant in Downtown Palo Alto</li>
+  <li>This little piggy ate too much at Indian Buffet.</li>
+  <li>This little piggy got lost</li>
+</ol>
+
+<h2>Unordered list</h2>
+<ul>
+  <li>First element</li>
+  <li>Second element</li>
+  <li>Third element</li>
+</ul>
+
+
+<p>And finally, how about some</p><a href="http://www.google.com/">Links?</a>
+
+<p>Remember, you can view the HTMl code from this or any other page by using the "View Page Source" command of your browser.</p>
+
+</body>
+
+</html>
+
@@ -0,0 +1,26 @@
+<html>
+    <head>
+        <style>
+            .center_div {
+                border: 1px solid gray;
+                margin-left: auto;
+                margin-right: auto;
+                width: 90%;
+                background-color: #d0f0f6;
+                text-align: left;
+                padding: 8px;
+            }
+        </style>
+        <link href="style.css" rel="stylesheet">
+    </head>
+    <body>
+        <div class="center_div">
+            <h1>Hello Baeldung!</h1>
+            <img src="Java_logo.png">
+        
+            <div class="myclass">
+                <p>This is the tutorial to convert html to pdf.</p>
+            </div>
+        </div>
+    </body>
+</html>
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<configuration>
+    <appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
+        <encoder>
+            <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+            </pattern>
+        </encoder>
+    </appender>
+
+    <root level="INFO">
+        <appender-ref ref="STDOUT" />
+    </root>
+</configuration>
@@ -0,0 +1,6 @@
+.myclass{
+  font-family: Helvetica, sans-serif;
+  font-size:25;
+  font-weight: normal;
+  color: blue;
+}
@@ -0,0 +1,7 @@
+<html lang="en" xmlns:th="http://www.thymeleaf.org">
+  <body>
+    <h3 style="text-align: center; color: green">
+      <span th:text="'Welcome to ' + ${to} + '!'"></span>
+    </h3>
+  </body>
+</html>
@@ -0,0 +1,3 @@
+Test
+Text
+			Test	TEST
@@ -0,0 +1,84 @@
+package com.baeldung.pdf.base64;
+
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+
+import static org.junit.Assert.*;
+
+public class EncodeDecodeUnitTest {
+
+    private static final String IN_FILE = "src/test/resources/input.pdf";
+    private static final String OUT_FILE = "src/test/resources/output.pdf";
+    private static byte[] inFileBytes;
+
+    @BeforeClass
+    public static void fileToByteArray() throws IOException {
+        inFileBytes = Files.readAllBytes(Paths.get(IN_FILE));
+    }
+
+    @Test
+    public void givenJavaBase64_whenEncoded_thenDecodedOK() throws IOException {
+
+        byte[] encoded = java.util.Base64.getEncoder().encode(inFileBytes);
+
+        byte[] decoded = java.util.Base64.getDecoder().decode(encoded);
+
+        writeToFile(OUT_FILE, decoded);
+
+        assertNotEquals(encoded.length, decoded.length);
+        assertEquals(inFileBytes.length, decoded.length);
+
+        assertArrayEquals(decoded, inFileBytes);
+
+    }
+
+    @Test
+    public void givenJavaBase64_whenEncodedStream_thenDecodedStreamOK() throws IOException {
+
+        try (OutputStream os = java.util.Base64.getEncoder().wrap(new FileOutputStream(OUT_FILE));
+          FileInputStream fis = new FileInputStream(IN_FILE)) {
+            byte[] bytes = new byte[1024];
+            int read;
+            while ((read = fis.read(bytes)) > -1) {
+                os.write(bytes, 0, read);
+            }
+        }
+
+        byte[] encoded = java.util.Base64.getEncoder().encode(inFileBytes);
+        byte[] encodedOnDisk = Files.readAllBytes(Paths.get(OUT_FILE));
+        assertArrayEquals(encoded, encodedOnDisk);
+
+        byte[] decoded = java.util.Base64.getDecoder().decode(encoded);
+        byte[] decodedOnDisk = java.util.Base64.getDecoder().decode(encodedOnDisk);
+        assertArrayEquals(decoded, decodedOnDisk);
+    }
+
+    @Test
+    public void givenApacheCommons_givenJavaBase64_whenEncoded_thenDecodedOK() throws IOException {
+
+        byte[] encoded = org.apache.commons.codec.binary.Base64.encodeBase64(inFileBytes);
+
+        byte[] decoded = org.apache.commons.codec.binary.Base64.decodeBase64(encoded);
+
+        writeToFile(OUT_FILE, decoded);
+
+        assertNotEquals(encoded.length, decoded.length);
+        assertEquals(inFileBytes.length, decoded.length);
+
+        assertArrayEquals(decoded, inFileBytes);
+    }
+
+    private void writeToFile(String fileName, byte[] bytes) throws IOException {
+        FileOutputStream fos = new FileOutputStream(fileName);
+        fos.write(bytes);
+        fos.flush();
+        fos.close();
+    }
+}
@@ -0,0 +1,52 @@
+package com.baeldung.pdfreadertest;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.junit.jupiter.api.Test;
+
+import com.itextpdf.text.pdf.PdfReader;
+import com.itextpdf.text.pdf.parser.PdfTextExtractor;
+
+class ReadPdfFileUnitTest {
+
+    @Test
+    public void givenSamplePdf_whenUsingApachePdfBox_thenCompareOutput() throws IOException {
+        String expectedText = "Hello World!\n";
+
+        File file = new File("sample.pdf");
+        PDDocument document = PDDocument.load(file);
+
+        PDFTextStripper stripper = new PDFTextStripper();
+
+        String text = stripper.getText(document);
+
+        document.close();
+
+        assertEquals(expectedText, text);
+
+    }
+
+    @Test
+    public void givenSamplePdf_whenUsingiTextPdf_thenCompareOutput() throws IOException {
+        String expectedText = "Hello World!";
+
+        PdfReader reader = new PdfReader("sample.pdf");
+        int pages = reader.getNumberOfPages();
+        StringBuilder text = new StringBuilder();
+
+        for (int i = 1; i <= pages; i++) {
+
+            text.append(PdfTextExtractor.getTextFromPage(reader, i));
+
+        }
+        reader.close();
+        assertEquals(expectedText, text.toString());
+
+    }
+
+}
@@ -0,0 +1,52 @@
+package com.baeldung.pdfthymeleaf;
+
+import com.lowagie.text.DocumentException;
+import org.junit.Test;
+import org.thymeleaf.TemplateEngine;
+import org.thymeleaf.context.Context;
+import org.thymeleaf.templatemode.TemplateMode;
+import org.thymeleaf.templateresolver.ClassLoaderTemplateResolver;
+import org.xhtmlrenderer.pdf.ITextRenderer;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import static org.junit.Assert.assertTrue;
+
+public class PDFThymeleafUnitTest {
+
+    @Test
+    public void givenThymeleafTemplate_whenParsedAndRenderedToPDF_thenItShouldNotBeEmpty() throws DocumentException, IOException {
+        String html = parseThymeleafTemplate();
+
+        ByteArrayOutputStream outputStream = generatePdfOutputStreamFromHtml(html);
+
+        assertTrue(outputStream.size() > 0);
+    }
+
+    private ByteArrayOutputStream generatePdfOutputStreamFromHtml(String html) throws IOException, DocumentException {
+        ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
+
+        ITextRenderer renderer = new ITextRenderer();
+        renderer.setDocumentFromString(html);
+        renderer.layout();
+        renderer.createPDF(outputStream);
+
+        outputStream.close();
+        return outputStream;
+    }
+
+    private String parseThymeleafTemplate() {
+        ClassLoaderTemplateResolver templateResolver = new ClassLoaderTemplateResolver();
+        templateResolver.setSuffix(".html");
+        templateResolver.setTemplateMode(TemplateMode.HTML);
+
+        TemplateEngine templateEngine = new TemplateEngine();
+        templateEngine.setTemplateResolver(templateResolver);
+
+        Context context = new Context();
+        context.setVariable("to", "Baeldung.com");
+
+        return templateEngine.process("thymeleaf_template", context);
+    }
+}
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+    <modelVersion>4.0.0</modelVersion>
+    <artifactId>text-processing-libraries-modules</artifactId>
+    <name>text-processing-libraries-modules</name>
+    <packaging>pom</packaging>
+
+    <parent>
+        <artifactId>parent-modules</artifactId>
+        <groupId>com.baeldung</groupId>
+        <version>1.0.0-SNAPSHOT</version>
+    </parent>
+
+    <modules>
+        <module>antlr</module>
+        <module>apache-tika</module>
+        <module>asciidoctor</module>
+        <module>pdf</module>
+        <module>pdf-2</module>
+    </modules>
+
+</project>