JAVA-29281 Create new module Text Processing Libraries Modules (#15479)
This commit is contained in:
@@ -0,0 +1,3 @@
|
||||
## Text Processing Libraries
|
||||
|
||||
This module contains modules about Text Processing Libraries.
|
||||
@@ -0,0 +1,7 @@
|
||||
## ANTLR
|
||||
|
||||
This module contains articles about ANTLR
|
||||
|
||||
### Relevant Articles:
|
||||
|
||||
- [Java with ANTLR](https://www.baeldung.com/java-antlr)
|
||||
@@ -0,0 +1,63 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>antlr</artifactId>
|
||||
<name>antlr</name>
|
||||
|
||||
<parent>
|
||||
<groupId>com.baeldung</groupId>
|
||||
<artifactId>text-processing-libraries-modules</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr4-runtime</artifactId>
|
||||
<version>${antlr.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.antlr</groupId>
|
||||
<artifactId>antlr4-maven-plugin</artifactId>
|
||||
<version>${antlr.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<goals>
|
||||
<goal>antlr4</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.codehaus.mojo</groupId>
|
||||
<artifactId>build-helper-maven-plugin</artifactId>
|
||||
<version>${mojo.version}</version>
|
||||
<executions>
|
||||
<execution>
|
||||
<phase>generate-sources</phase>
|
||||
<goals>
|
||||
<goal>add-source</goal>
|
||||
</goals>
|
||||
<configuration>
|
||||
<sources>
|
||||
<source>${basedir}/target/generated-sources/antlr4</source>
|
||||
</sources>
|
||||
</configuration>
|
||||
</execution>
|
||||
</executions>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<properties>
|
||||
<antlr.version>4.7.1</antlr.version>
|
||||
<mojo.version>3.0.0</mojo.version>
|
||||
</properties>
|
||||
|
||||
</project>
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,16 @@
|
||||
grammar Log;
|
||||
|
||||
log : entry+;
|
||||
entry : timestamp ' ' level ' ' message CRLF;
|
||||
timestamp : DATE ' ' TIME;
|
||||
level : 'ERROR' | 'INFO' | 'DEBUG';
|
||||
message : (TEXT | ' ')+;
|
||||
|
||||
fragment DIGIT : [0-9];
|
||||
fragment TWODIGIT : DIGIT DIGIT;
|
||||
fragment LETTER : [A-Za-z];
|
||||
|
||||
DATE : TWODIGIT TWODIGIT '-' LETTER LETTER LETTER '-' TWODIGIT;
|
||||
TIME : TWODIGIT ':' TWODIGIT ':' TWODIGIT;
|
||||
TEXT : LETTER+;
|
||||
CRLF : '\r'? '\n' | '\r';
|
||||
+28
@@ -0,0 +1,28 @@
|
||||
package com.baeldung.antlr.java;
|
||||
|
||||
import com.baeldung.antlr.Java8BaseListener;
|
||||
import com.baeldung.antlr.Java8Parser;
|
||||
import org.antlr.v4.runtime.tree.TerminalNode;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
public class UppercaseMethodListener extends Java8BaseListener {
|
||||
|
||||
private List<String> errors = new ArrayList<String>();
|
||||
|
||||
@Override
|
||||
public void enterMethodDeclarator(Java8Parser.MethodDeclaratorContext ctx) {
|
||||
TerminalNode node = ctx.Identifier();
|
||||
String methodName = node.getText();
|
||||
|
||||
if (Character.isUpperCase(methodName.charAt(0))){
|
||||
errors.add(String.format("Method %s is uppercased!", methodName));
|
||||
}
|
||||
}
|
||||
|
||||
public List<String> getErrors(){
|
||||
return Collections.unmodifiableList(errors);
|
||||
}
|
||||
}
|
||||
+51
@@ -0,0 +1,51 @@
|
||||
package com.baeldung.antlr.log;
|
||||
|
||||
import com.baeldung.antlr.LogBaseListener;
|
||||
import com.baeldung.antlr.LogParser;
|
||||
import com.baeldung.antlr.log.model.LogLevel;
|
||||
import com.baeldung.antlr.log.model.LogEntry;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
|
||||
public class LogListener extends LogBaseListener {
|
||||
|
||||
private static final DateTimeFormatter DEFAULT_DATETIME_FORMATTER
|
||||
= DateTimeFormatter.ofPattern("yyyy-MMM-dd HH:mm:ss", Locale.ENGLISH);
|
||||
|
||||
private List<LogEntry> entries = new ArrayList<>();
|
||||
private LogEntry currentLogEntry;
|
||||
|
||||
@Override
|
||||
public void enterEntry(LogParser.EntryContext ctx) {
|
||||
this.currentLogEntry = new LogEntry();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void exitEntry(LogParser.EntryContext ctx) {
|
||||
entries.add(currentLogEntry);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void enterTimestamp(LogParser.TimestampContext ctx) {
|
||||
currentLogEntry.setTimestamp(LocalDateTime.parse(ctx.getText(), DEFAULT_DATETIME_FORMATTER));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void enterMessage(LogParser.MessageContext ctx) {
|
||||
currentLogEntry.setMessage(ctx.getText());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void enterLevel(LogParser.LevelContext ctx) {
|
||||
currentLogEntry.setLevel(LogLevel.valueOf(ctx.getText()));
|
||||
}
|
||||
|
||||
public List<LogEntry> getEntries() {
|
||||
return Collections.unmodifiableList(entries);
|
||||
}
|
||||
}
|
||||
+35
@@ -0,0 +1,35 @@
|
||||
package com.baeldung.antlr.log.model;
|
||||
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
public class LogEntry {
|
||||
|
||||
private LogLevel level;
|
||||
private String message;
|
||||
private LocalDateTime timestamp;
|
||||
|
||||
public LogLevel getLevel() {
|
||||
return level;
|
||||
}
|
||||
|
||||
public void setLevel(LogLevel level) {
|
||||
this.level = level;
|
||||
}
|
||||
|
||||
public String getMessage() {
|
||||
return message;
|
||||
}
|
||||
|
||||
public void setMessage(String message) {
|
||||
this.message = message;
|
||||
}
|
||||
|
||||
public LocalDateTime getTimestamp() {
|
||||
return timestamp;
|
||||
}
|
||||
|
||||
public void setTimestamp(LocalDateTime timestamp) {
|
||||
this.timestamp = timestamp;
|
||||
}
|
||||
}
|
||||
+5
@@ -0,0 +1,5 @@
|
||||
package com.baeldung.antlr.log.model;
|
||||
|
||||
public enum LogLevel {
|
||||
DEBUG, INFO, ERROR
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<configuration>
|
||||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
|
||||
</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<root level="INFO">
|
||||
<appender-ref ref="STDOUT" />
|
||||
</root>
|
||||
</configuration>
|
||||
+30
@@ -0,0 +1,30 @@
|
||||
package com.baeldung.antlr;
|
||||
|
||||
import com.baeldung.antlr.java.UppercaseMethodListener;
|
||||
import org.antlr.v4.runtime.CharStreams;
|
||||
import org.antlr.v4.runtime.CommonTokenStream;
|
||||
import org.antlr.v4.runtime.tree.ParseTree;
|
||||
import org.antlr.v4.runtime.tree.ParseTreeWalker;
|
||||
import org.junit.Test;
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
|
||||
public class JavaParserUnitTest {
|
||||
|
||||
@Test
|
||||
public void whenOneMethodStartsWithUpperCase_thenOneErrorReturned() throws Exception{
|
||||
|
||||
String javaClassContent = "public class SampleClass { void DoSomething(){} }";
|
||||
Java8Lexer java8Lexer = new Java8Lexer(CharStreams.fromString(javaClassContent));
|
||||
CommonTokenStream tokens = new CommonTokenStream(java8Lexer);
|
||||
Java8Parser java8Parser = new Java8Parser(tokens);
|
||||
ParseTree tree = java8Parser.compilationUnit();
|
||||
ParseTreeWalker walker = new ParseTreeWalker();
|
||||
UppercaseMethodListener uppercaseMethodListener = new UppercaseMethodListener();
|
||||
walker.walk(uppercaseMethodListener, tree);
|
||||
|
||||
assertThat(uppercaseMethodListener.getErrors().size(), is(1));
|
||||
assertThat(uppercaseMethodListener.getErrors().get(0),
|
||||
is("Method DoSomething is uppercased!"));
|
||||
}
|
||||
}
|
||||
+36
@@ -0,0 +1,36 @@
|
||||
package com.baeldung.antlr;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.is;
|
||||
import static org.hamcrest.MatcherAssert.assertThat;
|
||||
|
||||
import com.baeldung.antlr.log.LogListener;
|
||||
import com.baeldung.antlr.log.model.LogLevel;
|
||||
import com.baeldung.antlr.log.model.LogEntry;
|
||||
import org.antlr.v4.runtime.CharStreams;
|
||||
import org.antlr.v4.runtime.CommonTokenStream;
|
||||
import org.antlr.v4.runtime.tree.ParseTreeWalker;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
|
||||
public class LogParserUnitTest {
|
||||
|
||||
@Test
|
||||
public void whenLogContainsOneErrorLogEntry_thenOneErrorIsReturned() throws Exception {
|
||||
String logLines = "2018-May-05 14:20:21 DEBUG entering awesome method\r\n" +
|
||||
"2018-May-05 14:20:24 ERROR Bad thing happened\r\n";
|
||||
LogLexer serverLogLexer = new LogLexer(CharStreams.fromString(logLines));
|
||||
CommonTokenStream tokens = new CommonTokenStream( serverLogLexer );
|
||||
LogParser logParser = new LogParser(tokens);
|
||||
ParseTreeWalker walker = new ParseTreeWalker();
|
||||
LogListener logWalker = new LogListener();
|
||||
walker.walk(logWalker, logParser.log());
|
||||
|
||||
assertThat(logWalker.getEntries().size(), is(2));
|
||||
LogEntry error = logWalker.getEntries().get(1);
|
||||
assertThat(error.getLevel(), is(LogLevel.ERROR));
|
||||
assertThat(error.getMessage(), is("Bad thing happened"));
|
||||
assertThat(error.getTimestamp(), is(LocalDateTime.of(2018,5,5,14,20,24)));
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
## Apache Tika
|
||||
|
||||
This module contains articles about Apache Tika
|
||||
|
||||
### Relevant articles:
|
||||
|
||||
- [Content Analysis with Apache Tika](https://www.baeldung.com/apache-tika)
|
||||
@@ -0,0 +1,28 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>apache-tika</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
<name>apache-tika</name>
|
||||
|
||||
<parent>
|
||||
<groupId>com.baeldung</groupId>
|
||||
<artifactId>text-processing-libraries-modules</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.tika</groupId>
|
||||
<artifactId>tika-parsers</artifactId>
|
||||
<version>${tika.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<properties>
|
||||
<tika.version>1.17</tika.version>
|
||||
</properties>
|
||||
|
||||
</project>
|
||||
+67
@@ -0,0 +1,67 @@
|
||||
package com.baeldung.tika;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.tika.Tika;
|
||||
import org.apache.tika.detect.DefaultDetector;
|
||||
import org.apache.tika.detect.Detector;
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.apache.tika.mime.MediaType;
|
||||
import org.apache.tika.parser.AutoDetectParser;
|
||||
import org.apache.tika.parser.ParseContext;
|
||||
import org.apache.tika.parser.Parser;
|
||||
import org.apache.tika.sax.BodyContentHandler;
|
||||
import org.xml.sax.ContentHandler;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
public class TikaAnalysis {
|
||||
public static String detectDocTypeUsingDetector(InputStream stream) throws IOException {
|
||||
Detector detector = new DefaultDetector();
|
||||
Metadata metadata = new Metadata();
|
||||
|
||||
MediaType mediaType = detector.detect(stream, metadata);
|
||||
return mediaType.toString();
|
||||
}
|
||||
|
||||
public static String detectDocTypeUsingFacade(InputStream stream) throws IOException {
|
||||
Tika tika = new Tika();
|
||||
String mediaType = tika.detect(stream);
|
||||
return mediaType;
|
||||
}
|
||||
|
||||
public static String extractContentUsingParser(InputStream stream) throws IOException, TikaException, SAXException {
|
||||
Parser parser = new AutoDetectParser();
|
||||
ContentHandler handler = new BodyContentHandler();
|
||||
Metadata metadata = new Metadata();
|
||||
ParseContext context = new ParseContext();
|
||||
|
||||
parser.parse(stream, handler, metadata, context);
|
||||
return handler.toString();
|
||||
}
|
||||
|
||||
public static String extractContentUsingFacade(InputStream stream) throws IOException, TikaException {
|
||||
Tika tika = new Tika();
|
||||
String content = tika.parseToString(stream);
|
||||
return content;
|
||||
}
|
||||
|
||||
public static Metadata extractMetadatatUsingParser(InputStream stream) throws IOException, SAXException, TikaException {
|
||||
Parser parser = new AutoDetectParser();
|
||||
ContentHandler handler = new BodyContentHandler();
|
||||
Metadata metadata = new Metadata();
|
||||
ParseContext context = new ParseContext();
|
||||
|
||||
parser.parse(stream, handler, metadata, context);
|
||||
return metadata;
|
||||
}
|
||||
|
||||
public static Metadata extractMetadatatUsingFacade(InputStream stream) throws IOException, TikaException {
|
||||
Tika tika = new Tika();
|
||||
Metadata metadata = new Metadata();
|
||||
|
||||
tika.parse(stream, metadata);
|
||||
return metadata;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<configuration>
|
||||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
|
||||
</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<root level="INFO">
|
||||
<appender-ref ref="STDOUT" />
|
||||
</root>
|
||||
</configuration>
|
||||
+79
@@ -0,0 +1,79 @@
|
||||
package com.baeldung.tika;
|
||||
|
||||
import static org.hamcrest.CoreMatchers.containsString;
|
||||
import static org.junit.Assert.assertEquals;
|
||||
import static org.junit.Assert.assertThat;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.tika.exception.TikaException;
|
||||
import org.apache.tika.metadata.Metadata;
|
||||
import org.junit.Test;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
public class TikaUnitTest {
|
||||
@Test
|
||||
public void whenUsingDetector_thenDocumentTypeIsReturned() throws IOException {
|
||||
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.txt");
|
||||
String mediaType = TikaAnalysis.detectDocTypeUsingDetector(stream);
|
||||
|
||||
assertEquals("application/pdf", mediaType);
|
||||
|
||||
stream.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenUsingFacade_thenDocumentTypeIsReturned() throws IOException {
|
||||
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.txt");
|
||||
String mediaType = TikaAnalysis.detectDocTypeUsingFacade(stream);
|
||||
|
||||
assertEquals("application/pdf", mediaType);
|
||||
|
||||
stream.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenUsingParser_thenContentIsReturned() throws IOException, TikaException, SAXException {
|
||||
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.docx");
|
||||
String content = TikaAnalysis.extractContentUsingParser(stream);
|
||||
|
||||
assertThat(content, containsString("Apache Tika - a content analysis toolkit"));
|
||||
assertThat(content, containsString("detects and extracts metadata and text"));
|
||||
|
||||
stream.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenUsingFacade_thenContentIsReturned() throws IOException, TikaException {
|
||||
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.docx");
|
||||
String content = TikaAnalysis.extractContentUsingFacade(stream);
|
||||
|
||||
assertThat(content, containsString("Apache Tika - a content analysis toolkit"));
|
||||
assertThat(content, containsString("detects and extracts metadata and text"));
|
||||
|
||||
stream.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenUsingParser_thenMetadataIsReturned() throws IOException, TikaException, SAXException {
|
||||
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.xlsx");
|
||||
Metadata metadata = TikaAnalysis.extractMetadatatUsingParser(stream);
|
||||
|
||||
assertEquals("org.apache.tika.parser.DefaultParser", metadata.get("X-Parsed-By"));
|
||||
assertEquals("Microsoft Office User", metadata.get("Author"));
|
||||
|
||||
stream.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void whenUsingFacade_thenMetadataIsReturned() throws IOException, TikaException {
|
||||
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.xlsx");
|
||||
Metadata metadata = TikaAnalysis.extractMetadatatUsingFacade(stream);
|
||||
|
||||
assertEquals("org.apache.tika.parser.DefaultParser", metadata.get("X-Parsed-By"));
|
||||
assertEquals("Microsoft Office User", metadata.get("Author"));
|
||||
|
||||
stream.close();
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,8 @@
|
||||
## Asciidoctor
|
||||
|
||||
This module contains articles about Asciidoctor
|
||||
|
||||
### Relevant articles:
|
||||
|
||||
- [Generating a Book with Asciidoctor](https://www.baeldung.com/asciidoctor-book)
|
||||
- [Introduction to Asciidoctor in Java](https://www.baeldung.com/asciidoctor)
|
||||
@@ -0,0 +1,71 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>asciidoctor</artifactId>
|
||||
<name>asciidoctor</name>
|
||||
|
||||
<parent>
|
||||
<groupId>com.baeldung</groupId>
|
||||
<artifactId>text-processing-libraries-modules</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.asciidoctor</groupId>
|
||||
<artifactId>asciidoctorj</artifactId>
|
||||
<version>${asciidoctorj.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.asciidoctor</groupId>
|
||||
<artifactId>asciidoctorj-pdf</artifactId>
|
||||
<version>${asciidoctorj-pdf.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.asciidoctor</groupId>
|
||||
<artifactId>asciidoctor-maven-plugin</artifactId>
|
||||
<version>${asciidoctor-maven-plugin.version}</version>
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.asciidoctor</groupId>
|
||||
<artifactId>asciidoctorj-pdf</artifactId>
|
||||
<version>${asciidoctorj-pdf.plugin.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
<executions>
|
||||
<execution>
|
||||
<id>output-pdf</id>
|
||||
<phase>generate-resources</phase>
|
||||
<goals>
|
||||
<goal>process-asciidoc</goal>
|
||||
</goals>
|
||||
</execution>
|
||||
</executions>
|
||||
<configuration>
|
||||
<sourceDirectory>src/docs/asciidoc</sourceDirectory>
|
||||
<outputDirectory>target/docs/asciidoc</outputDirectory>
|
||||
<attributes>
|
||||
<pdf-stylesdir>${project.basedir}/src/themes</pdf-stylesdir>
|
||||
<pdf-style>custom</pdf-style>
|
||||
</attributes>
|
||||
<backend>pdf</backend>
|
||||
<doctype>book</doctype>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<properties>
|
||||
<asciidoctor-maven-plugin.version>2.2.2</asciidoctor-maven-plugin.version>
|
||||
<asciidoctorj.version>2.5.7</asciidoctorj.version>
|
||||
<asciidoctorj-pdf.version>2.3.4</asciidoctorj-pdf.version>
|
||||
<asciidoctorj-pdf.plugin.version>2.3.4</asciidoctorj-pdf.plugin.version>
|
||||
</properties>
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,13 @@
|
||||
:icons: font
|
||||
|
||||
|
||||
= Generating book with AsciiDoctorj
|
||||
Baeldung
|
||||
|
||||
[abstract]
|
||||
This is the actual content.
|
||||
|
||||
== First Section
|
||||
|
||||
This is first section of the book where you can include some nice icons like icon:comment[].
|
||||
You can also create http://www.baeldung.com[links]
|
||||
+33
@@ -0,0 +1,33 @@
|
||||
package com.baeldung.asciidoctor;
|
||||
|
||||
import static org.asciidoctor.Asciidoctor.Factory.create;
|
||||
import static org.asciidoctor.OptionsBuilder.options;
|
||||
|
||||
import java.io.File;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.asciidoctor.Asciidoctor;
|
||||
|
||||
public class AsciidoctorDemo {
|
||||
|
||||
private final Asciidoctor asciidoctor;
|
||||
|
||||
AsciidoctorDemo() {
|
||||
asciidoctor = create();
|
||||
}
|
||||
|
||||
public void generatePDFFromString(final String input) {
|
||||
|
||||
final Map<String, Object> options = options().inPlace(true)
|
||||
.backend("pdf")
|
||||
.asMap();
|
||||
|
||||
|
||||
final String outfile = asciidoctor.convertFile(new File("sample.adoc"), options);
|
||||
}
|
||||
|
||||
String generateHTMLFromString(final String input) {
|
||||
return asciidoctor.convert("Hello _Baeldung_!", new HashMap<String, Object>());
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<configuration>
|
||||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
|
||||
</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<root level="INFO">
|
||||
<appender-ref ref="STDOUT" />
|
||||
</root>
|
||||
</configuration>
|
||||
+13
@@ -0,0 +1,13 @@
|
||||
package com.baeldung.asciidoctor;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
public class AsciidoctorDemoIntegrationTest {
|
||||
|
||||
@Test
|
||||
public void givenString_whenConverting_thenResultingHTMLCode() {
|
||||
final AsciidoctorDemo asciidoctorDemo = new AsciidoctorDemo();
|
||||
Assert.assertEquals(asciidoctorDemo.generateHTMLFromString("Hello _Baeldung_!"), "<div class=\"paragraph\">\n<p>Hello <em>Baeldung</em>!</p>\n</div>");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
title_page:
|
||||
align: left
|
||||
|
||||
page:
|
||||
layout: portrait
|
||||
margin: [0.75in, 1in, 0.75in, 1in]
|
||||
size: A4
|
||||
base:
|
||||
font_color: #333333
|
||||
line_height_length: 17
|
||||
line_height: $base_line_height_length / $base_font_size
|
||||
link:
|
||||
font_color: #009900
|
||||
|
||||
header:
|
||||
height: 0.5in
|
||||
line_height: 1
|
||||
recto_content:
|
||||
center: '{document-title}'
|
||||
verso_content:
|
||||
center: '{document-title}'
|
||||
|
||||
footer:
|
||||
height: 0.5in
|
||||
line_height: 1
|
||||
recto_content:
|
||||
right: '{chapter-title} | *{page-number}*'
|
||||
verso_content:
|
||||
left: '*{page-number}* | {chapter-title}'
|
||||
@@ -0,0 +1,3 @@
|
||||
## Relevant articles
|
||||
- [Editing Existing PDF Files in Java](https://www.baeldung.com/java-edit-existing-pdf)
|
||||
- [Get Information About a PDF in Java](https://www.baeldung.com/java-pdf-info)
|
||||
@@ -0,0 +1,75 @@
|
||||
<?xml version="1.0"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>pdf-2</artifactId>
|
||||
<name>pdf-2</name>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
<parent>
|
||||
<groupId>com.baeldung</groupId>
|
||||
<artifactId>text-processing-libraries-modules</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>com.itextpdf</groupId>
|
||||
<artifactId>itext7-core</artifactId>
|
||||
<version>${itextpdf.core.version}</version>
|
||||
<type>pom</type>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.itextpdf</groupId>
|
||||
<artifactId>cleanup</artifactId>
|
||||
<version>${itextpdf.cleanup.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.itextpdf</groupId>
|
||||
<artifactId>itextpdf</artifactId>
|
||||
<version>${itextpdf.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
<version>${pdfbox.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-ooxml</artifactId>
|
||||
<version>${poi-ooxml.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-api</artifactId>
|
||||
<version>${log4j-api.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-core</artifactId>
|
||||
<version>${log4j-core.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<finalName>pdf-2</finalName>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>src/main/resources</directory>
|
||||
<filtering>true</filtering>
|
||||
</resource>
|
||||
</resources>
|
||||
</build>
|
||||
|
||||
<properties>
|
||||
<itextpdf.version>5.5.13.3</itextpdf.version>
|
||||
<itextpdf.core.version>7.2.3</itextpdf.core.version>
|
||||
<itextpdf.cleanup.version>3.0.1</itextpdf.cleanup.version>
|
||||
<pdfbox.version>3.0.0</pdfbox.version>
|
||||
<poi-ooxml.version>5.2.5</poi-ooxml.version>
|
||||
<log4j-api.version>2.20.0</log4j-api.version>
|
||||
<log4j-core.version>2.20.0</log4j-core.version>
|
||||
</properties>
|
||||
|
||||
</project>
|
||||
+242
@@ -0,0 +1,242 @@
|
||||
package com.baeldung.exceltopdf;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.math.BigDecimal;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.poi.ss.usermodel.CellStyle;
|
||||
import org.apache.poi.ss.usermodel.HorizontalAlignment;
|
||||
import org.apache.poi.ss.usermodel.IndexedColors;
|
||||
import org.apache.poi.ss.usermodel.VerticalAlignment;
|
||||
import org.apache.poi.xssf.usermodel.XSSFColor;
|
||||
import org.apache.poi.xssf.usermodel.XSSFFont;
|
||||
import org.apache.poi.xssf.usermodel.XSSFSheet;
|
||||
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
|
||||
import org.apache.poi.ss.usermodel.CellType;
|
||||
import org.apache.poi.ss.usermodel.Row;
|
||||
import org.apache.poi.ss.usermodel.Cell;
|
||||
|
||||
import com.itextpdf.text.BaseColor;
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.Element;
|
||||
import com.itextpdf.text.Font;
|
||||
import com.itextpdf.text.FontFactory;
|
||||
import com.itextpdf.text.Paragraph;
|
||||
import com.itextpdf.text.Phrase;
|
||||
import com.itextpdf.text.pdf.PdfPCell;
|
||||
import com.itextpdf.text.pdf.PdfPTable;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
public class ExcelToPDFConverter {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(ExcelToPDFConverter.class);
|
||||
|
||||
public static XSSFWorkbook readExcelFile(String excelFilePath) throws IOException {
|
||||
FileInputStream inputStream = new FileInputStream(excelFilePath);
|
||||
XSSFWorkbook workbook = new XSSFWorkbook(inputStream);
|
||||
inputStream.close();
|
||||
return workbook;
|
||||
}
|
||||
|
||||
private static Document createPDFDocument(String pdfFilePath) throws IOException, DocumentException {
|
||||
Document document = new Document();
|
||||
PdfWriter.getInstance(document, new FileOutputStream(pdfFilePath));
|
||||
document.open();
|
||||
return document;
|
||||
}
|
||||
|
||||
public static void convertExcelToPDF(String excelFilePath, String pdfFilePath) throws IOException, DocumentException {
|
||||
XSSFWorkbook workbook = readExcelFile(excelFilePath);
|
||||
Document document = createPDFDocument(pdfFilePath);
|
||||
|
||||
for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
|
||||
XSSFSheet worksheet = workbook.getSheetAt(i);
|
||||
|
||||
// Add header with sheet name as title
|
||||
Paragraph title = new Paragraph(worksheet.getSheetName(), new Font(Font.FontFamily.HELVETICA, 18, Font.BOLD));
|
||||
title.setSpacingAfter(20f);
|
||||
title.setAlignment(Element.ALIGN_CENTER);
|
||||
document.add(title);
|
||||
|
||||
createAndAddTable(worksheet, document);
|
||||
// Add a new page for each sheet (except the last one)
|
||||
if (i < workbook.getNumberOfSheets() - 1) {
|
||||
document.newPage();
|
||||
}
|
||||
}
|
||||
|
||||
document.close();
|
||||
workbook.close();
|
||||
}
|
||||
|
||||
private static void createAndAddTable(XSSFSheet worksheet, Document document) throws DocumentException, IOException {
|
||||
PdfPTable table = new PdfPTable(worksheet.getRow(0)
|
||||
.getPhysicalNumberOfCells());
|
||||
table.setWidthPercentage(100);
|
||||
addTableHeader(worksheet, table);
|
||||
addTableData(worksheet, table);
|
||||
document.add(table);
|
||||
}
|
||||
|
||||
private static void addTableHeader(XSSFSheet worksheet, PdfPTable table) throws DocumentException, IOException {
|
||||
Row headerRow = worksheet.getRow(0);
|
||||
for (int i = 0; i < headerRow.getPhysicalNumberOfCells(); i++) {
|
||||
Cell cell = headerRow.getCell(i);
|
||||
String headerText = getCellText(cell);
|
||||
PdfPCell headerCell = new PdfPCell(new Phrase(headerText, getCellStyle(cell)));
|
||||
setBackgroundColor(cell, headerCell);
|
||||
setCellAlignment(cell, headerCell);
|
||||
table.addCell(headerCell);
|
||||
}
|
||||
}
|
||||
|
||||
public static String getCellText(Cell cell) {
|
||||
String cellValue;
|
||||
switch (cell.getCellType()) {
|
||||
case STRING:
|
||||
cellValue = cell.getStringCellValue();
|
||||
break;
|
||||
case NUMERIC:
|
||||
cellValue = String.valueOf(BigDecimal.valueOf(cell.getNumericCellValue()));
|
||||
break;
|
||||
case BLANK:
|
||||
default:
|
||||
cellValue = "";
|
||||
break;
|
||||
}
|
||||
return cellValue;
|
||||
}
|
||||
|
||||
private static void addTableData(XSSFSheet worksheet, PdfPTable table) throws DocumentException, IOException {
|
||||
Iterator<Row> rowIterator = worksheet.iterator();
|
||||
while (rowIterator.hasNext()) {
|
||||
Row row = rowIterator.next();
|
||||
if (row.getRowNum() == 0) {
|
||||
continue;
|
||||
}
|
||||
for (int i = 0; i < row.getPhysicalNumberOfCells(); i++) {
|
||||
Cell cell = row.getCell(i);
|
||||
String cellValue = getCellText(cell);
|
||||
PdfPCell cellPdf = new PdfPCell(new Phrase(cellValue, getCellStyle(cell)));
|
||||
setBackgroundColor(cell, cellPdf);
|
||||
setCellAlignment(cell, cellPdf);
|
||||
table.addCell(cellPdf);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void setBackgroundColor(Cell cell, PdfPCell cellPdf) {
|
||||
// Set background color
|
||||
short bgColorIndex = cell.getCellStyle()
|
||||
.getFillForegroundColor();
|
||||
if (bgColorIndex != IndexedColors.AUTOMATIC.getIndex()) {
|
||||
XSSFColor bgColor = (XSSFColor) cell.getCellStyle()
|
||||
.getFillForegroundColorColor();
|
||||
if (bgColor != null) {
|
||||
byte[] rgb = bgColor.getRGB();
|
||||
if (rgb != null && rgb.length == 3) {
|
||||
cellPdf.setBackgroundColor(new BaseColor(rgb[0] & 0xFF, rgb[1] & 0xFF, rgb[2] & 0xFF));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void setCellAlignment(Cell cell, PdfPCell cellPdf) {
|
||||
CellStyle cellStyle = cell.getCellStyle();
|
||||
|
||||
HorizontalAlignment horizontalAlignment = cellStyle.getAlignment();
|
||||
VerticalAlignment verticalAlignment = cellStyle.getVerticalAlignment();
|
||||
|
||||
switch (horizontalAlignment) {
|
||||
case LEFT:
|
||||
cellPdf.setHorizontalAlignment(Element.ALIGN_LEFT);
|
||||
break;
|
||||
case CENTER:
|
||||
cellPdf.setHorizontalAlignment(Element.ALIGN_CENTER);
|
||||
break;
|
||||
case JUSTIFY:
|
||||
case FILL:
|
||||
cellPdf.setVerticalAlignment(Element.ALIGN_JUSTIFIED);
|
||||
break;
|
||||
case RIGHT:
|
||||
cellPdf.setHorizontalAlignment(Element.ALIGN_RIGHT);
|
||||
break;
|
||||
}
|
||||
|
||||
switch (verticalAlignment) {
|
||||
case TOP:
|
||||
cellPdf.setVerticalAlignment(Element.ALIGN_TOP);
|
||||
break;
|
||||
case CENTER:
|
||||
cellPdf.setVerticalAlignment(Element.ALIGN_MIDDLE);
|
||||
break;
|
||||
case JUSTIFY:
|
||||
cellPdf.setVerticalAlignment(Element.ALIGN_JUSTIFIED);
|
||||
break;
|
||||
case BOTTOM:
|
||||
cellPdf.setVerticalAlignment(Element.ALIGN_BOTTOM);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
private static Font getCellStyle(Cell cell) throws DocumentException, IOException {
|
||||
Font font = new Font();
|
||||
CellStyle cellStyle = cell.getCellStyle();
|
||||
org.apache.poi.ss.usermodel.Font cellFont = cell.getSheet()
|
||||
.getWorkbook()
|
||||
.getFontAt(cellStyle.getFontIndexAsInt());
|
||||
|
||||
short fontColorIndex = cellFont.getColor();
|
||||
if (fontColorIndex != IndexedColors.AUTOMATIC.getIndex() && cellFont instanceof XSSFFont) {
|
||||
XSSFColor fontColor = ((XSSFFont) cellFont).getXSSFColor();
|
||||
if (fontColor != null) {
|
||||
byte[] rgb = fontColor.getRGB();
|
||||
if (rgb != null && rgb.length == 3) {
|
||||
font.setColor(new BaseColor(rgb[0] & 0xFF, rgb[1] & 0xFF, rgb[2] & 0xFF));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (cellFont.getItalic()) {
|
||||
font.setStyle(Font.ITALIC);
|
||||
}
|
||||
|
||||
if (cellFont.getStrikeout()) {
|
||||
font.setStyle(Font.STRIKETHRU);
|
||||
}
|
||||
|
||||
if (cellFont.getUnderline() == 1) {
|
||||
font.setStyle(Font.UNDERLINE);
|
||||
}
|
||||
|
||||
short fontSize = cellFont.getFontHeightInPoints();
|
||||
font.setSize(fontSize);
|
||||
|
||||
if (cellFont.getBold()) {
|
||||
font.setStyle(Font.BOLD);
|
||||
}
|
||||
|
||||
String fontName = cellFont.getFontName();
|
||||
if (FontFactory.isRegistered(fontName)) {
|
||||
font.setFamily(fontName); // Use extracted font family if supported by iText
|
||||
} else {
|
||||
logger.warn("Unsupported font type: {}", fontName);
|
||||
// - Use a fallback font (e.g., Helvetica)
|
||||
font.setFamily("Helvetica");
|
||||
}
|
||||
|
||||
return font;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws DocumentException, IOException {
|
||||
String excelFilePath = "src/main/resources/excelsample.xlsx";
|
||||
String pdfFilePath = "src/main/resources/pdfsample.pdf";
|
||||
convertExcelToPDF(excelFilePath, pdfFilePath);
|
||||
}
|
||||
}
|
||||
+43
@@ -0,0 +1,43 @@
|
||||
package com.baeldung.pdfedition;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import com.itextpdf.kernel.geom.Rectangle;
|
||||
import com.itextpdf.kernel.pdf.PdfDocument;
|
||||
import com.itextpdf.kernel.pdf.PdfReader;
|
||||
import com.itextpdf.kernel.pdf.PdfWriter;
|
||||
import com.itextpdf.pdfcleanup.CleanUpProperties;
|
||||
import com.itextpdf.pdfcleanup.PdfCleanUpLocation;
|
||||
import com.itextpdf.pdfcleanup.PdfCleanUpTool;
|
||||
import com.itextpdf.pdfcleanup.PdfCleaner;
|
||||
import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
|
||||
import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
|
||||
|
||||
public class PdfContentRemover {
|
||||
|
||||
private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
|
||||
private static final String DESTINATION = "src/main/resources/baeldung-cleaned.pdf";
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
PdfReader reader = new PdfReader(SOURCE);
|
||||
PdfWriter writer = new PdfWriter(DESTINATION);
|
||||
PdfDocument pdfDocument = new PdfDocument(reader, writer);
|
||||
removeContentFromDocument(pdfDocument);
|
||||
pdfDocument.close();
|
||||
}
|
||||
|
||||
private static void removeContentFromDocument(PdfDocument pdfDocument) throws IOException {
|
||||
// 5.1. remove text
|
||||
CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
|
||||
strategy.add(new RegexBasedCleanupStrategy("Baeldung"));
|
||||
PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
|
||||
|
||||
// 5.2. remove other areas
|
||||
List<PdfCleanUpLocation> cleanUpLocations = Arrays.asList(new PdfCleanUpLocation(1, new Rectangle(10, 50, 90, 70)), new PdfCleanUpLocation(2, new Rectangle(35, 400, 100, 35)));
|
||||
PdfCleanUpTool cleaner = new PdfCleanUpTool(pdfDocument, cleanUpLocations, new CleanUpProperties());
|
||||
cleaner.cleanUp();
|
||||
}
|
||||
|
||||
}
|
||||
+86
@@ -0,0 +1,86 @@
|
||||
package com.baeldung.pdfedition;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
|
||||
import com.itextpdf.forms.PdfAcroForm;
|
||||
import com.itextpdf.forms.fields.PdfFormField;
|
||||
import com.itextpdf.forms.fields.PdfTextFormField;
|
||||
import com.itextpdf.io.image.ImageData;
|
||||
import com.itextpdf.io.image.ImageDataFactory;
|
||||
import com.itextpdf.kernel.geom.Rectangle;
|
||||
import com.itextpdf.kernel.pdf.PdfDocument;
|
||||
import com.itextpdf.kernel.pdf.PdfReader;
|
||||
import com.itextpdf.kernel.pdf.PdfString;
|
||||
import com.itextpdf.kernel.pdf.PdfWriter;
|
||||
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
|
||||
import com.itextpdf.kernel.pdf.annot.PdfTextAnnotation;
|
||||
import com.itextpdf.layout.Document;
|
||||
import com.itextpdf.layout.element.Image;
|
||||
import com.itextpdf.layout.element.Paragraph;
|
||||
import com.itextpdf.layout.element.Table;
|
||||
import com.itextpdf.layout.element.Text;
|
||||
import com.itextpdf.layout.properties.UnitValue;
|
||||
|
||||
public class PdfEditor {
|
||||
|
||||
private static final String SOURCE = "src/main/resources/baeldung.pdf";
|
||||
private static final String DESTINATION = "src/main/resources/baeldung-modified.pdf";
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
PdfReader reader = new PdfReader(SOURCE);
|
||||
PdfWriter writer = new PdfWriter(DESTINATION);
|
||||
PdfDocument pdfDocument = new PdfDocument(reader, writer);
|
||||
addContentToDocument(pdfDocument);
|
||||
}
|
||||
|
||||
private static void addContentToDocument(PdfDocument pdfDocument) throws MalformedURLException {
|
||||
// 4.1. add form
|
||||
PdfFormField personal = PdfFormField.createEmptyField(pdfDocument);
|
||||
personal.setFieldName("information");
|
||||
PdfTextFormField name = PdfFormField.createText(pdfDocument, new Rectangle(35, 400, 100, 30), "name", "");
|
||||
personal.addKid(name);
|
||||
PdfAcroForm.getAcroForm(pdfDocument, true)
|
||||
.addField(personal, pdfDocument.getFirstPage());
|
||||
|
||||
// 4.2. add new page
|
||||
pdfDocument.addNewPage(1);
|
||||
|
||||
// 4.3. add annotation
|
||||
PdfAnnotation ann = new PdfTextAnnotation(new Rectangle(40, 435, 0, 0)).setTitle(new PdfString("name"))
|
||||
.setContents("Your name");
|
||||
pdfDocument.getPage(2)
|
||||
.addAnnotation(ann);
|
||||
|
||||
// create document form pdf document
|
||||
Document document = new Document(pdfDocument);
|
||||
|
||||
// 4.4. add an image
|
||||
ImageData imageData = ImageDataFactory.create("src/main/resources/baeldung.png");
|
||||
Image image = new Image(imageData).scaleAbsolute(550, 100)
|
||||
.setFixedPosition(1, 10, 50);
|
||||
document.add(image);
|
||||
|
||||
// 4.5. add a paragraph
|
||||
Text title = new Text("This is a demo").setFontSize(16);
|
||||
Text author = new Text("Baeldung tutorials.");
|
||||
Paragraph p = new Paragraph().setFontSize(8)
|
||||
.add(title)
|
||||
.add(" from ")
|
||||
.add(author);
|
||||
document.add(p);
|
||||
|
||||
// 4.6. add a table
|
||||
Table table = new Table(UnitValue.createPercentArray(2));
|
||||
table.addHeaderCell("#");
|
||||
table.addHeaderCell("company");
|
||||
table.addCell("name");
|
||||
table.addCell("baeldung");
|
||||
document.add(table);
|
||||
|
||||
// close the document
|
||||
// this automatically closes the pdfDocument, which then closes automatically the pdfReader and pdfWriter
|
||||
document.close();
|
||||
}
|
||||
|
||||
}
|
||||
+45
@@ -0,0 +1,45 @@
|
||||
package com.baeldung.pdfedition;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import com.itextpdf.kernel.colors.ColorConstants;
|
||||
import com.itextpdf.kernel.pdf.PdfDocument;
|
||||
import com.itextpdf.kernel.pdf.PdfPage;
|
||||
import com.itextpdf.kernel.pdf.PdfReader;
|
||||
import com.itextpdf.kernel.pdf.PdfWriter;
|
||||
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
|
||||
import com.itextpdf.kernel.pdf.canvas.parser.listener.IPdfTextLocation;
|
||||
import com.itextpdf.layout.Canvas;
|
||||
import com.itextpdf.layout.element.Paragraph;
|
||||
import com.itextpdf.pdfcleanup.PdfCleaner;
|
||||
import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
|
||||
import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
|
||||
|
||||
public class PdfTextReplacement {
|
||||
|
||||
private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
|
||||
private static final String DESTINATION = "src/main/resources/baeldung-fixed.pdf";
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
PdfReader reader = new PdfReader(SOURCE);
|
||||
PdfWriter writer = new PdfWriter(DESTINATION);
|
||||
PdfDocument pdfDocument = new PdfDocument(reader, writer);
|
||||
replaceTextContentFromDocument(pdfDocument);
|
||||
pdfDocument.close();
|
||||
}
|
||||
|
||||
private static void replaceTextContentFromDocument(PdfDocument pdfDocument) throws IOException {
|
||||
CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
|
||||
strategy.add(new RegexBasedCleanupStrategy("Baeldung tutorials").setRedactionColor(ColorConstants.WHITE));
|
||||
PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
|
||||
|
||||
for (IPdfTextLocation location : strategy.getResultantLocations()) {
|
||||
PdfPage page = pdfDocument.getPage(location.getPageNumber() + 1);
|
||||
PdfCanvas pdfCanvas = new PdfCanvas(page.newContentStreamAfter(), page.getResources(), page.getDocument());
|
||||
Canvas canvas = new Canvas(pdfCanvas, location.getRectangle());
|
||||
canvas.add(new Paragraph("HIDDEN").setFontSize(8)
|
||||
.setMarginTop(0f));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
package com.baeldung.pdfinfo;
|
||||
|
||||
|
||||
import com.itextpdf.text.pdf.PdfReader;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
public class PdfInfoIText {
|
||||
|
||||
public static int getNumberOfPages(final String pdfFile) throws IOException {
|
||||
PdfReader reader = new PdfReader(pdfFile);
|
||||
int pages = reader.getNumberOfPages();
|
||||
reader.close();
|
||||
return pages;
|
||||
}
|
||||
|
||||
public static boolean isPasswordRequired(final String pdfFile) throws IOException {
|
||||
PdfReader reader = new PdfReader(pdfFile);
|
||||
boolean isEncrypted = reader.isEncrypted();
|
||||
reader.close();
|
||||
return isEncrypted;
|
||||
}
|
||||
|
||||
public static Map<String, String> getInfo(final String pdfFile) throws IOException {
|
||||
PdfReader reader = new PdfReader(pdfFile);
|
||||
Map<String, String> info = reader.getInfo();
|
||||
reader.close();
|
||||
return info;
|
||||
}
|
||||
}
|
||||
+36
@@ -0,0 +1,36 @@
|
||||
package com.baeldung.pdfinfo;
|
||||
|
||||
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
public class PdfInfoPdfBox {
|
||||
|
||||
public static int getNumberOfPages(final String pdfFile) throws IOException {
|
||||
File file = new File(pdfFile);
|
||||
PDDocument document = Loader.loadPDF(file);
|
||||
int pages = document.getNumberOfPages();
|
||||
document.close();
|
||||
return pages;
|
||||
}
|
||||
|
||||
public static boolean isPasswordRequired(final String pdfFile) throws IOException {
|
||||
File file = new File(pdfFile);
|
||||
PDDocument document = Loader.loadPDF(file);
|
||||
boolean isEncrypted = document.isEncrypted();
|
||||
document.close();
|
||||
return isEncrypted;
|
||||
}
|
||||
|
||||
public static PDDocumentInformation getInfo(final String pdfFile) throws IOException {
|
||||
File file = new File(pdfFile);
|
||||
PDDocument document = Loader.loadPDF(file);
|
||||
PDDocumentInformation info = document.getDocumentInformation();
|
||||
document.close();
|
||||
return info;
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
After Width: | Height: | Size: 4.5 KiB |
Binary file not shown.
@@ -0,0 +1,43 @@
|
||||
%PDF-1.4
|
||||
%âãÏÓ
|
||||
3 0 obj
|
||||
<</Length 751/Filter/FlateDecode>>stream
|
||||
xœ•WËRÛ0Ýû+î’.Puõ´–@Ÿ”R žÎtéi¤Íc0d:ü}%K&a¢«�daœãsïÑ‘tdªÓ¦’jn ™VXæ,îÞBÀšÛêèG?íz¸˜=>ÁU{×¾kþTÇéqÚyÞðçhøe¸çpŒj¨ÿ±©®«‡Š3'œBàL9.ü�@ý]�b}säŠ�ú®º-<ŽŠ³ÃBi&bHåX}CiÅT††.àÜûñÙ³5üË
|
||||
w’ üÐF4?ŒÍKѼ¼É–$îägW�&Iš$h’Dq
|
||||
'GÈìJ¢Ð(‰B£$
|
||||
�’H®ojGȯú]Iš$h’D IÅõMµ-¹D I�&Iš$Q\ßÔï�‚$M’4I"Ð$‰âú¦øb`fySh’D I�&I;èuH_Ÿ þ‹`-ãaK;h1zUˆ^3\×5„+:órõ9st¹b!i9Ü…dݪ…F±ÚV,Fü×ùŠ"T”`•|{ųõãÓjÑõcÅ—Z¶fJ�êŠl]^�¬5“õa#lVOí<_N9ôq~X¹«v6¥Ä�EŒ• *
|
||||
Xà!ãĬ)òu‹©^3O�û3¤lÈ_ºïÉrúíšp@8Æu‰¯
|
||||
«ó\ͳ%ê/Ò'ã4EeŸ
|
||||
Ìè“(ùT`ŸwËå3|Y}*ðýêv´OÒ–¸—´Q–$É=F�Ì�9£dÑ(ºïÅzö?)›ÛV�‰�›mÀ÷úD·.ø¤ER{|¢™Ñ'Uô‰fë–¼*/(í^;åQf⛥DéýaOH¦K{° ¢°ÇPÏìùW–ÅßoÂÛk=˜ƒmÒÁ¢BçÕªáfÎ=ÿŠ‹þ‘]“é^ÑdbxÉdšý½]®»9Lœ¶7gNÞkºŒÙrjÁ`$²yƒÁ>W7û›²Át¯h°)þó˳ÏîÛ~>ëà†íœç‘o“ß÷};Ý7´\©‘Li*hêöTüˆ
|
||||
endstream
|
||||
endobj
|
||||
5 0 obj
|
||||
<</Type/Page/MediaBox[0 0 595 842]/Resources<</Font<</F1 1 0 R/F2 2 0 R>>>>/Contents 3 0 R/Parent 4 0 R>>
|
||||
endobj
|
||||
1 0 obj
|
||||
<</Type/Font/Subtype/Type1/BaseFont/Helvetica-Bold/Encoding/WinAnsiEncoding>>
|
||||
endobj
|
||||
2 0 obj
|
||||
<</Type/Font/Subtype/Type1/BaseFont/Helvetica/Encoding/WinAnsiEncoding>>
|
||||
endobj
|
||||
4 0 obj
|
||||
<</Type/Pages/Count 1/Kids[5 0 R]>>
|
||||
endobj
|
||||
6 0 obj
|
||||
<</Type/Catalog/Pages 4 0 R>>
|
||||
endobj
|
||||
7 0 obj
|
||||
<</Producer(iText® 5.5.13.3 ©2000-2022 iText Group NV \(AGPL-version\))/CreationDate(D:20231213174247+08'00')/ModDate(D:20231213174247+08'00')>>
|
||||
endobj
|
||||
xref
|
||||
0 8
|
||||
0000000000 65535 f
|
||||
0000000954 00000 n
|
||||
0000001047 00000 n
|
||||
0000000015 00000 n
|
||||
0000001135 00000 n
|
||||
0000000833 00000 n
|
||||
0000001186 00000 n
|
||||
0000001231 00000 n
|
||||
trailer
|
||||
<</Size 8/Root 6 0 R/Info 7 0 R/ID [<6a28b1036b62f3808f3bfb62a88a5239><6a28b1036b62f3808f3bfb62a88a5239>]>>
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
package com.baeldung.pdfinfo;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class PdfInfoITextUnitTest {
|
||||
|
||||
private static final String PDF_FILE = "src/test/resources/input.pdf";
|
||||
|
||||
@Test
|
||||
void givenPdf_whenGetNumberOfPages_thenOK() throws IOException {
|
||||
assertEquals(4, PdfInfoIText.getNumberOfPages(PDF_FILE));
|
||||
}
|
||||
|
||||
@Test
|
||||
void givenPdf_whenIsPasswordRequired_thenOK() throws IOException {
|
||||
assertFalse(PdfInfoIText.isPasswordRequired(PDF_FILE));
|
||||
}
|
||||
|
||||
@Test
|
||||
void givenPdf_whenGetInfo_thenOK() throws IOException {
|
||||
Map<String, String> info = PdfInfoIText.getInfo(PDF_FILE);
|
||||
assertEquals("LibreOffice 4.2", info.get("Producer"));
|
||||
assertEquals("Writer", info.get("Creator"));
|
||||
}
|
||||
}
|
||||
+31
@@ -0,0 +1,31 @@
|
||||
package com.baeldung.pdfinfo;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertFalse;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
class PdfInfoPdfBoxUnitTest {
|
||||
|
||||
private static final String PDF_FILE = "src/test/resources/input.pdf";
|
||||
|
||||
@Test
|
||||
void givenPdf_whenGetNumberOfPages_thenOK() throws IOException {
|
||||
assertEquals(4, PdfInfoPdfBox.getNumberOfPages(PDF_FILE));
|
||||
}
|
||||
|
||||
@Test
|
||||
void givenPdf_whenIsPasswordRequired_thenOK() throws IOException {
|
||||
assertFalse(PdfInfoPdfBox.isPasswordRequired(PDF_FILE));
|
||||
}
|
||||
|
||||
@Test
|
||||
void givenPdf_whenGetInfo_thenOK() throws IOException {
|
||||
PDDocumentInformation info = PdfInfoPdfBox.getInfo(PDF_FILE);
|
||||
assertEquals("LibreOffice 4.2", info.getProducer());
|
||||
assertEquals("Writer", info.getCreator());
|
||||
}
|
||||
}
|
||||
Binary file not shown.
@@ -0,0 +1 @@
|
||||
/target/
|
||||
@@ -0,0 +1,11 @@
|
||||
## PDF
|
||||
|
||||
This module contains articles about PDF files.
|
||||
|
||||
### Relevant Articles:
|
||||
- [PDF Conversions in Java](https://www.baeldung.com/pdf-conversions-java)
|
||||
- [Creating PDF Files in Java](https://www.baeldung.com/java-pdf-creation)
|
||||
- [Generating PDF Files Using Thymeleaf](https://www.baeldung.com/thymeleaf-generate-pdf)
|
||||
- [Java Convert PDF to Base64](https://www.baeldung.com/java-convert-pdf-to-base64)
|
||||
- [HTML to PDF Using OpenPDF](https://www.baeldung.com/java-html-to-pdf)
|
||||
- [Reading PDF File Using Java](https://www.baeldung.com/java-pdf-file-read)
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,122 @@
|
||||
<?xml version="1.0"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>pdf</artifactId>
|
||||
<name>pdf</name>
|
||||
<url>http://maven.apache.org</url>
|
||||
|
||||
<parent>
|
||||
<groupId>com.baeldung</groupId>
|
||||
<artifactId>text-processing-libraries-modules</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox-tools</artifactId>
|
||||
<version>${pdfbox-tools.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
<groupId>commons-logging</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>net.sf.cssbox</groupId>
|
||||
<artifactId>pdf2dom</artifactId>
|
||||
<version>${pdf2dom.version}</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<artifactId>commons-logging</artifactId>
|
||||
<groupId>commons-logging</groupId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.itextpdf</groupId>
|
||||
<artifactId>itextpdf</artifactId>
|
||||
<version>${itextpdf.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.itextpdf.tool</groupId>
|
||||
<artifactId>xmlworker</artifactId>
|
||||
<version>${xmlworker.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-scratchpad</artifactId>
|
||||
<version>${poi-scratchpad.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.xmlgraphics</groupId>
|
||||
<artifactId>batik-transcoder</artifactId>
|
||||
<version>${batik-transcoder.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-ooxml</artifactId>
|
||||
<version>${poi-ooxml.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.thymeleaf</groupId>
|
||||
<artifactId>thymeleaf</artifactId>
|
||||
<version>${thymeleaf.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.xhtmlrenderer</groupId>
|
||||
<artifactId>flying-saucer-pdf</artifactId>
|
||||
<version>${flying-saucer-pdf.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.xhtmlrenderer</groupId>
|
||||
<artifactId>flying-saucer-pdf-openpdf</artifactId>
|
||||
<version>${flying-saucer-pdf-openpdf.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jsoup</groupId>
|
||||
<artifactId>jsoup</artifactId>
|
||||
<version>${jsoup.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.openhtmltopdf</groupId>
|
||||
<artifactId>openhtmltopdf-core</artifactId>
|
||||
<version>${open-html-pdf-core.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.openhtmltopdf</groupId>
|
||||
<artifactId>openhtmltopdf-pdfbox</artifactId>
|
||||
<version>${open-html-pdfbox.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
<finalName>pdf</finalName>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>src/main/resources</directory>
|
||||
<filtering>true</filtering>
|
||||
</resource>
|
||||
</resources>
|
||||
</build>
|
||||
|
||||
<properties>
|
||||
<pdfbox-tools.version>3.0.0</pdfbox-tools.version>
|
||||
<pdf2dom.version>2.0.1</pdf2dom.version>
|
||||
<itextpdf.version>5.5.13.3</itextpdf.version>
|
||||
<xmlworker.version>5.5.10</xmlworker.version>
|
||||
<poi-scratchpad.version>3.15</poi-scratchpad.version>
|
||||
<batik-transcoder.version>1.8</batik-transcoder.version>
|
||||
<poi-ooxml.version>3.15</poi-ooxml.version>
|
||||
<thymeleaf.version>3.1.2.RELEASE</thymeleaf.version>
|
||||
<flying-saucer-pdf.version>9.3.1</flying-saucer-pdf.version>
|
||||
<open-html-pdfbox.version>1.0.6</open-html-pdfbox.version>
|
||||
<open-html-pdf-core.version>1.0.10</open-html-pdf-core.version>
|
||||
<flying-saucer-pdf-openpdf.version>9.2.1</flying-saucer-pdf-openpdf.version>
|
||||
<jsoup.version>1.16.2</jsoup.version>
|
||||
</properties>
|
||||
|
||||
</project>
|
||||
Binary file not shown.
+35
@@ -0,0 +1,35 @@
|
||||
package com.baeldung.pdf;
|
||||
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.Paragraph;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.List;
|
||||
|
||||
public class DocxToPDFExample {
|
||||
|
||||
public static void main(String[] args) throws IOException, DocumentException {
|
||||
InputStream docxInputStream = new FileInputStream("input.docx");
|
||||
try (XWPFDocument document = new XWPFDocument(docxInputStream);
|
||||
OutputStream pdfOutputStream = new FileOutputStream("output.pdf");) {
|
||||
Document pdfDocument = new Document();
|
||||
PdfWriter.getInstance(pdfDocument, pdfOutputStream);
|
||||
pdfDocument.open();
|
||||
|
||||
List<XWPFParagraph> paragraphs = document.getParagraphs();
|
||||
for (XWPFParagraph paragraph : paragraphs) {
|
||||
pdfDocument.add(new Paragraph(paragraph.getText()));
|
||||
}
|
||||
pdfDocument.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
+52
@@ -0,0 +1,52 @@
|
||||
package com.baeldung.pdf;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
import java.io.Writer;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.fit.pdfdom.PDFDomTree;
|
||||
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
import com.itextpdf.tool.xml.XMLWorkerHelper;
|
||||
|
||||
public class PDF2HTMLExample {
|
||||
|
||||
private static final String PDF = "src/main/resources/pdf.pdf";
|
||||
private static final String HTML = "src/main/resources/html.html";
|
||||
|
||||
public static void main(String[] args) {
|
||||
try {
|
||||
generateHTMLFromPDF(PDF);
|
||||
generatePDFFromHTML(HTML);
|
||||
} catch (IOException | ParserConfigurationException | DocumentException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static void generateHTMLFromPDF(String filename) throws ParserConfigurationException, IOException {
|
||||
PDDocument pdf = PDDocument.load(new File(filename));
|
||||
PDFDomTree parser = new PDFDomTree();
|
||||
Writer output = new PrintWriter("src/output/pdf.html", "utf-8");
|
||||
parser.writeText(pdf, output);
|
||||
output.close();
|
||||
if (pdf != null) {
|
||||
pdf.close();
|
||||
}
|
||||
}
|
||||
|
||||
private static void generatePDFFromHTML(String filename) throws ParserConfigurationException, IOException, DocumentException {
|
||||
Document document = new Document();
|
||||
PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream("src/output/html.pdf"));
|
||||
document.open();
|
||||
XMLWorkerHelper.getInstance().parseXHtml(writer, document, new FileInputStream(filename));
|
||||
document.close();
|
||||
}
|
||||
}
|
||||
+62
@@ -0,0 +1,62 @@
|
||||
package com.baeldung.pdf;
|
||||
|
||||
import java.awt.image.BufferedImage;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.rendering.ImageType;
|
||||
import org.apache.pdfbox.rendering.PDFRenderer;
|
||||
import org.apache.pdfbox.tools.imageio.ImageIOUtil;
|
||||
|
||||
import com.itextpdf.text.BadElementException;
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.Image;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
|
||||
public class PDF2ImageExample {
|
||||
|
||||
private static final String PDF = "src/main/resources/pdf.pdf";
|
||||
private static final String JPG = "http://cdn2.baeldung.netdna-cdn.com/wp-content/uploads/2016/05/baeldung-rest-widget-main-1.2.0";
|
||||
private static final String GIF = "https://media.giphy.com/media/l3V0x6kdXUW9M4ONq/giphy";
|
||||
|
||||
public static void main(String[] args) {
|
||||
try {
|
||||
generateImageFromPDF(PDF, "png");
|
||||
generateImageFromPDF(PDF, "jpeg");
|
||||
generateImageFromPDF(PDF, "gif");
|
||||
generatePDFFromImage(JPG, "jpg");
|
||||
generatePDFFromImage(GIF, "gif");
|
||||
} catch (IOException | DocumentException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static void generateImageFromPDF(String filename, String extension) throws IOException {
|
||||
PDDocument document = PDDocument.load(new File(filename));
|
||||
PDFRenderer pdfRenderer = new PDFRenderer(document);
|
||||
for (int page = 0; page < document.getNumberOfPages(); ++page) {
|
||||
BufferedImage bim = pdfRenderer.renderImageWithDPI(page, 300, ImageType.RGB);
|
||||
ImageIOUtil.writeImage(bim, String.format("src/output/pdf-%d.%s", page + 1, extension), 300);
|
||||
}
|
||||
document.close();
|
||||
}
|
||||
|
||||
private static void generatePDFFromImage(String filename, String extension)
|
||||
throws IOException, BadElementException, DocumentException {
|
||||
Document document = new Document();
|
||||
String input = filename + "." + extension;
|
||||
String output = "src/output/" + extension + ".pdf";
|
||||
FileOutputStream fos = new FileOutputStream(output);
|
||||
PdfWriter writer = PdfWriter.getInstance(document, fos);
|
||||
writer.open();
|
||||
document.open();
|
||||
document.add(Image.getInstance((new URL(input))));
|
||||
document.close();
|
||||
writer.close();
|
||||
}
|
||||
|
||||
}
|
||||
+84
@@ -0,0 +1,84 @@
|
||||
package com.baeldung.pdf;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.FileReader;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintWriter;
|
||||
|
||||
import org.apache.pdfbox.cos.COSDocument;
|
||||
import org.apache.pdfbox.io.RandomAccessFile;
|
||||
import org.apache.pdfbox.pdfparser.PDFParser;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.DocumentException;
|
||||
import com.itextpdf.text.Element;
|
||||
import com.itextpdf.text.Font;
|
||||
import com.itextpdf.text.PageSize;
|
||||
import com.itextpdf.text.Paragraph;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
|
||||
public class PDF2TextExample {
|
||||
|
||||
private static final String PDF = "src/main/resources/pdf.pdf";
|
||||
private static final String TXT = "src/main/resources/txt.txt";
|
||||
|
||||
public static void main(String[] args) {
|
||||
try {
|
||||
generateTxtFromPDF(PDF);
|
||||
generatePDFFromTxt(TXT);
|
||||
} catch (IOException | DocumentException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static void generateTxtFromPDF(String filename) throws IOException {
|
||||
File f = new File(filename);
|
||||
String parsedText;
|
||||
PDFParser parser = new PDFParser(new RandomAccessFile(f, "r"));
|
||||
parser.parse();
|
||||
|
||||
COSDocument cosDoc = parser.getDocument();
|
||||
|
||||
PDFTextStripper pdfStripper = new PDFTextStripper();
|
||||
PDDocument pdDoc = new PDDocument(cosDoc);
|
||||
|
||||
parsedText = pdfStripper.getText(pdDoc);
|
||||
|
||||
if (cosDoc != null)
|
||||
cosDoc.close();
|
||||
if (pdDoc != null)
|
||||
pdDoc.close();
|
||||
|
||||
PrintWriter pw = new PrintWriter("src/output/pdf.txt");
|
||||
pw.print(parsedText);
|
||||
pw.close();
|
||||
}
|
||||
|
||||
private static void generatePDFFromTxt(String filename) throws IOException, DocumentException {
|
||||
Document pdfDoc = new Document(PageSize.A4);
|
||||
PdfWriter.getInstance(pdfDoc, new FileOutputStream("src/output/txt.pdf"))
|
||||
.setPdfVersion(PdfWriter.PDF_VERSION_1_7);
|
||||
pdfDoc.open();
|
||||
|
||||
Font myfont = new Font();
|
||||
myfont.setStyle(Font.NORMAL);
|
||||
myfont.setSize(11);
|
||||
pdfDoc.add(new Paragraph("\n"));
|
||||
|
||||
BufferedReader br = new BufferedReader(new FileReader(filename));
|
||||
String strLine;
|
||||
while ((strLine = br.readLine()) != null) {
|
||||
Paragraph para = new Paragraph(strLine + "\n", myfont);
|
||||
para.setAlignment(Element.ALIGN_JUSTIFIED);
|
||||
pdfDoc.add(para);
|
||||
}
|
||||
|
||||
pdfDoc.close();
|
||||
br.close();
|
||||
}
|
||||
|
||||
}
|
||||
+50
@@ -0,0 +1,50 @@
|
||||
package com.baeldung.pdf;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.poi.xwpf.usermodel.BreakType;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFDocument;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
|
||||
import org.apache.poi.xwpf.usermodel.XWPFRun;
|
||||
|
||||
import com.itextpdf.text.pdf.PdfReader;
|
||||
import com.itextpdf.text.pdf.parser.PdfReaderContentParser;
|
||||
import com.itextpdf.text.pdf.parser.SimpleTextExtractionStrategy;
|
||||
import com.itextpdf.text.pdf.parser.TextExtractionStrategy;
|
||||
|
||||
public class PDF2WordExample {
|
||||
|
||||
private static final String FILENAME = "src/main/resources/pdf.pdf";
|
||||
|
||||
public static void main(String[] args) {
|
||||
try {
|
||||
generateDocFromPDF(FILENAME);
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static void generateDocFromPDF(String filename) throws IOException {
|
||||
XWPFDocument doc = new XWPFDocument();
|
||||
|
||||
String pdf = filename;
|
||||
PdfReader reader = new PdfReader(pdf);
|
||||
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
|
||||
|
||||
for (int i = 1; i <= reader.getNumberOfPages(); i++) {
|
||||
TextExtractionStrategy strategy = parser.processContent(i, new SimpleTextExtractionStrategy());
|
||||
String text = strategy.getResultantText();
|
||||
XWPFParagraph p = doc.createParagraph();
|
||||
XWPFRun run = p.createRun();
|
||||
run.setText(text);
|
||||
run.addBreak(BreakType.PAGE);
|
||||
}
|
||||
FileOutputStream out = new FileOutputStream("src/output/pdf.docx");
|
||||
doc.write(out);
|
||||
out.close();
|
||||
reader.close();
|
||||
doc.close();
|
||||
}
|
||||
|
||||
}
|
||||
+77
@@ -0,0 +1,77 @@
|
||||
package com.baeldung.pdf;
|
||||
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.net.URISyntaxException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
import com.itextpdf.text.BadElementException;
|
||||
import com.itextpdf.text.BaseColor;
|
||||
import com.itextpdf.text.Document;
|
||||
import com.itextpdf.text.Element;
|
||||
import com.itextpdf.text.Image;
|
||||
import com.itextpdf.text.Phrase;
|
||||
import com.itextpdf.text.pdf.PdfPCell;
|
||||
import com.itextpdf.text.pdf.PdfPTable;
|
||||
import com.itextpdf.text.pdf.PdfWriter;
|
||||
|
||||
public class PDFSampleMain {
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
try {
|
||||
|
||||
Document document = new Document();
|
||||
PdfWriter.getInstance(document, new FileOutputStream("iTextTable.pdf"));
|
||||
|
||||
document.open();
|
||||
|
||||
PdfPTable table = new PdfPTable(3);
|
||||
addTableHeader(table);
|
||||
addRows(table);
|
||||
addCustomRows(table);
|
||||
|
||||
document.add(table);
|
||||
document.close();
|
||||
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private static void addTableHeader(PdfPTable table) {
|
||||
Stream.of("column header 1", "column header 2", "column header 3")
|
||||
.forEach(columnTitle -> {
|
||||
PdfPCell header = new PdfPCell();
|
||||
header.setBackgroundColor(BaseColor.LIGHT_GRAY);
|
||||
header.setBorderWidth(2);
|
||||
header.setPhrase(new Phrase(columnTitle));
|
||||
table.addCell(header);
|
||||
});
|
||||
}
|
||||
|
||||
private static void addRows(PdfPTable table) {
|
||||
table.addCell("row 1, col 1");
|
||||
table.addCell("row 1, col 2");
|
||||
table.addCell("row 1, col 3");
|
||||
}
|
||||
|
||||
private static void addCustomRows(PdfPTable table) throws URISyntaxException, BadElementException, IOException {
|
||||
Path path = Paths.get(ClassLoader.getSystemResource("Java_logo.png").toURI());
|
||||
Image img = Image.getInstance(path.toAbsolutePath().toString());
|
||||
img.scalePercent(10);
|
||||
|
||||
PdfPCell imageCell = new PdfPCell(img);
|
||||
table.addCell(imageCell);
|
||||
|
||||
PdfPCell horizontalAlignCell = new PdfPCell(new Phrase("row 2, col 2"));
|
||||
horizontalAlignCell.setHorizontalAlignment(Element.ALIGN_CENTER);
|
||||
table.addCell(horizontalAlignCell);
|
||||
|
||||
PdfPCell verticalAlignCell = new PdfPCell(new Phrase("row 2, col 3"));
|
||||
verticalAlignCell.setVerticalAlignment(Element.ALIGN_BOTTOM);
|
||||
table.addCell(verticalAlignCell);
|
||||
}
|
||||
}
|
||||
+56
@@ -0,0 +1,56 @@
|
||||
package com.baeldung.pdf.openpdf;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.w3c.dom.Element;
|
||||
import org.xhtmlrenderer.extend.FSImage;
|
||||
import org.xhtmlrenderer.extend.ReplacedElement;
|
||||
import org.xhtmlrenderer.extend.ReplacedElementFactory;
|
||||
import org.xhtmlrenderer.extend.UserAgentCallback;
|
||||
import org.xhtmlrenderer.layout.LayoutContext;
|
||||
import org.xhtmlrenderer.pdf.ITextFSImage;
|
||||
import org.xhtmlrenderer.pdf.ITextImageElement;
|
||||
import org.xhtmlrenderer.render.BlockBox;
|
||||
import org.xhtmlrenderer.simple.extend.FormSubmissionListener;
|
||||
|
||||
import com.lowagie.text.Image;
|
||||
|
||||
public class CustomElementFactoryImpl implements ReplacedElementFactory {
|
||||
@Override
|
||||
public ReplacedElement createReplacedElement(LayoutContext lc, BlockBox box, UserAgentCallback uac, int cssWidth, int cssHeight) {
|
||||
Element e = box.getElement();
|
||||
String nodeName = e.getNodeName();
|
||||
if (nodeName.equals("img")) {
|
||||
String imagePath = e.getAttribute("src");
|
||||
try {
|
||||
InputStream input = new FileInputStream("src/main/resources/" + imagePath);
|
||||
byte[] bytes = IOUtils.toByteArray(input);
|
||||
Image image = Image.getInstance(bytes);
|
||||
FSImage fsImage = new ITextFSImage(image);
|
||||
if (cssWidth != -1 || cssHeight != -1) {
|
||||
fsImage.scale(cssWidth, cssHeight);
|
||||
} else {
|
||||
fsImage.scale(2000, 1000);
|
||||
}
|
||||
return new ITextImageElement(fsImage);
|
||||
} catch (Exception e1) {
|
||||
e1.printStackTrace();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove(Element e) {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setFormSubmissionListener(FormSubmissionListener listener) {
|
||||
}
|
||||
}
|
||||
+53
@@ -0,0 +1,53 @@
|
||||
package com.baeldung.pdf.openpdf;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.xhtmlrenderer.layout.SharedContext;
|
||||
import org.xhtmlrenderer.pdf.ITextRenderer;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
public class Html2PdfUsingFlyingSaucer {
|
||||
|
||||
private static final String HTML_INPUT = "src/main/resources/htmlforopenpdf.html";
|
||||
private static final String PDF_OUTPUT = "src/main/resources/html2pdf.pdf";
|
||||
|
||||
public static void main(String[] args) {
|
||||
try {
|
||||
Html2PdfUsingFlyingSaucer htmlToPdf = new Html2PdfUsingFlyingSaucer();
|
||||
htmlToPdf.generateHtmlToPdf();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private void generateHtmlToPdf() throws Exception {
|
||||
File inputHTML = new File(HTML_INPUT);
|
||||
Document inputHtml = createWellFormedHtml(inputHTML);
|
||||
File outputPdf = new File(PDF_OUTPUT);
|
||||
xhtmlToPdf(inputHtml, outputPdf);
|
||||
}
|
||||
|
||||
private Document createWellFormedHtml(File inputHTML) throws IOException {
|
||||
Document document = Jsoup.parse(inputHTML, "UTF-8");
|
||||
document.outputSettings()
|
||||
.syntax(Document.OutputSettings.Syntax.xml);
|
||||
return document;
|
||||
}
|
||||
|
||||
private void xhtmlToPdf(Document xhtml, File outputPdf) throws Exception {
|
||||
try (OutputStream outputStream = new FileOutputStream(outputPdf)) {
|
||||
ITextRenderer renderer = new ITextRenderer();
|
||||
SharedContext sharedContext = renderer.getSharedContext();
|
||||
sharedContext.setPrint(true);
|
||||
sharedContext.setInteractive(false);
|
||||
sharedContext.setReplacedElementFactory(new CustomElementFactoryImpl());
|
||||
renderer.setDocumentFromString(xhtml.html());
|
||||
renderer.layout();
|
||||
renderer.createPDF(outputStream);
|
||||
}
|
||||
}
|
||||
}
|
||||
+55
@@ -0,0 +1,55 @@
|
||||
package com.baeldung.pdf.openpdf;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.FileSystems;
|
||||
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.helper.W3CDom;
|
||||
import org.jsoup.nodes.Document;
|
||||
|
||||
import com.openhtmltopdf.pdfboxout.PdfRendererBuilder;
|
||||
|
||||
public class Html2PdfUsingOpenHtml {
|
||||
|
||||
private static final String HTML_INPUT = "src/main/resources/htmlforopenpdf.html";
|
||||
private static final String PDF_OUTPUT = "src/main/resources/html2pdf.pdf";
|
||||
|
||||
public static void main(String[] args) {
|
||||
try {
|
||||
Html2PdfUsingOpenHtml htmlToPdf = new Html2PdfUsingOpenHtml();
|
||||
htmlToPdf.generateHtmlToPdf();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
private void generateHtmlToPdf() throws IOException {
|
||||
File inputHTML = new File(HTML_INPUT);
|
||||
Document doc = createWellFormedHtml(inputHTML);
|
||||
xhtmlToPdf(doc, PDF_OUTPUT);
|
||||
}
|
||||
|
||||
private Document createWellFormedHtml(File inputHTML) throws IOException {
|
||||
Document document = Jsoup.parse(inputHTML, "UTF-8");
|
||||
document.outputSettings()
|
||||
.syntax(Document.OutputSettings.Syntax.xml);
|
||||
return document;
|
||||
}
|
||||
|
||||
private void xhtmlToPdf(Document doc, String outputPdf) throws IOException {
|
||||
try (OutputStream os = new FileOutputStream(outputPdf)) {
|
||||
String baseUri = FileSystems.getDefault()
|
||||
.getPath("src/main/resources/")
|
||||
.toUri()
|
||||
.toString();
|
||||
PdfRendererBuilder builder = new PdfRendererBuilder();
|
||||
builder.withUri(outputPdf);
|
||||
builder.toStream(os);
|
||||
builder.withW3cDocument(new W3CDom().fromJsoup(doc), baseUri);
|
||||
builder.run();
|
||||
}
|
||||
}
|
||||
}
|
||||
+48
@@ -0,0 +1,48 @@
|
||||
package com.baeldung.pdfthymeleaf;
|
||||
|
||||
import com.lowagie.text.DocumentException;
|
||||
import org.thymeleaf.TemplateEngine;
|
||||
import org.thymeleaf.context.Context;
|
||||
import org.thymeleaf.templatemode.TemplateMode;
|
||||
import org.thymeleaf.templateresolver.ClassLoaderTemplateResolver;
|
||||
import org.xhtmlrenderer.pdf.ITextRenderer;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
public class PDFThymeleafExample {
|
||||
|
||||
public static void main(String[] args) throws IOException, DocumentException {
|
||||
PDFThymeleafExample thymeleaf2Pdf = new PDFThymeleafExample();
|
||||
String html = thymeleaf2Pdf.parseThymeleafTemplate();
|
||||
thymeleaf2Pdf.generatePdfFromHtml(html);
|
||||
}
|
||||
|
||||
public void generatePdfFromHtml(String html) throws IOException, DocumentException {
|
||||
String outputFolder = System.getProperty("user.home") + File.separator + "thymeleaf.pdf";
|
||||
OutputStream outputStream = new FileOutputStream(outputFolder);
|
||||
|
||||
ITextRenderer renderer = new ITextRenderer();
|
||||
renderer.setDocumentFromString(html);
|
||||
renderer.layout();
|
||||
renderer.createPDF(outputStream);
|
||||
|
||||
outputStream.close();
|
||||
}
|
||||
|
||||
private String parseThymeleafTemplate() {
|
||||
ClassLoaderTemplateResolver templateResolver = new ClassLoaderTemplateResolver();
|
||||
templateResolver.setSuffix(".html");
|
||||
templateResolver.setTemplateMode(TemplateMode.HTML);
|
||||
|
||||
TemplateEngine templateEngine = new TemplateEngine();
|
||||
templateEngine.setTemplateResolver(templateResolver);
|
||||
|
||||
Context context = new Context();
|
||||
context.setVariable("to", "Baeldung.com");
|
||||
|
||||
return templateEngine.process("thymeleaf_template", context);
|
||||
}
|
||||
}
|
||||
Binary file not shown.
|
After Width: | Height: | Size: 14 KiB |
@@ -0,0 +1,53 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
|
||||
<head>
|
||||
<title>A very simple webpage</title>
|
||||
</head>
|
||||
|
||||
<body>
|
||||
<h1>A very simple webpage. This is an "h1" level header.</h1>
|
||||
|
||||
<h2>This is a level h2 header.</h2>
|
||||
|
||||
<h6>This is a level h6 header. Pretty small!</h6>
|
||||
|
||||
<p>This is a standard paragraph.</p>
|
||||
|
||||
<p align=center>Now I've aligned it in the center of the screen.</p>
|
||||
|
||||
<p align=right>Now aligned to the right</p>
|
||||
|
||||
<p><b>Bold text</b></p>
|
||||
|
||||
<p><strong>Strongly emphasized text</strong> Can you tell the difference vs. bold?</p>
|
||||
|
||||
<p><i>Italics</i></p>
|
||||
|
||||
<p><em>Emphasized text</em> Just like Italics!</p>
|
||||
|
||||
<h2>How about a nice ordered list!</h2>
|
||||
<ol>
|
||||
<li>This little piggy went to market</li>
|
||||
<li>This little piggy went to SB228 class</li>
|
||||
<li>This little piggy went to an expensive restaurant in Downtown Palo Alto</li>
|
||||
<li>This little piggy ate too much at Indian Buffet.</li>
|
||||
<li>This little piggy got lost</li>
|
||||
</ol>
|
||||
|
||||
<h2>Unordered list</h2>
|
||||
<ul>
|
||||
<li>First element</li>
|
||||
<li>Second element</li>
|
||||
<li>Third element</li>
|
||||
</ul>
|
||||
|
||||
|
||||
<p>And finally, how about some</p><a href="http://www.google.com/">Links?</a>
|
||||
|
||||
<p>Remember, you can view the HTMl code from this or any other page by using the "View Page Source" command of your browser.</p>
|
||||
|
||||
</body>
|
||||
|
||||
</html>
|
||||
|
||||
Binary file not shown.
@@ -0,0 +1,26 @@
|
||||
<html>
|
||||
<head>
|
||||
<style>
|
||||
.center_div {
|
||||
border: 1px solid gray;
|
||||
margin-left: auto;
|
||||
margin-right: auto;
|
||||
width: 90%;
|
||||
background-color: #d0f0f6;
|
||||
text-align: left;
|
||||
padding: 8px;
|
||||
}
|
||||
</style>
|
||||
<link href="style.css" rel="stylesheet">
|
||||
</head>
|
||||
<body>
|
||||
<div class="center_div">
|
||||
<h1>Hello Baeldung!</h1>
|
||||
<img src="Java_logo.png">
|
||||
|
||||
<div class="myclass">
|
||||
<p>This is the tutorial to convert html to pdf.</p>
|
||||
</div>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,13 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<configuration>
|
||||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
|
||||
<encoder>
|
||||
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
|
||||
</pattern>
|
||||
</encoder>
|
||||
</appender>
|
||||
|
||||
<root level="INFO">
|
||||
<appender-ref ref="STDOUT" />
|
||||
</root>
|
||||
</configuration>
|
||||
Binary file not shown.
@@ -0,0 +1,6 @@
|
||||
.myclass{
|
||||
font-family: Helvetica, sans-serif;
|
||||
font-size:25;
|
||||
font-weight: normal;
|
||||
color: blue;
|
||||
}
|
||||
@@ -0,0 +1,7 @@
|
||||
<html lang="en" xmlns:th="http://www.thymeleaf.org">
|
||||
<body>
|
||||
<h3 style="text-align: center; color: green">
|
||||
<span th:text="'Welcome to ' + ${to} + '!'"></span>
|
||||
</h3>
|
||||
</body>
|
||||
</html>
|
||||
@@ -0,0 +1,3 @@
|
||||
Test
|
||||
Text
|
||||
Test TEST
|
||||
+84
@@ -0,0 +1,84 @@
|
||||
package com.baeldung.pdf.base64;
|
||||
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
|
||||
import static org.junit.Assert.*;
|
||||
|
||||
public class EncodeDecodeUnitTest {
|
||||
|
||||
private static final String IN_FILE = "src/test/resources/input.pdf";
|
||||
private static final String OUT_FILE = "src/test/resources/output.pdf";
|
||||
private static byte[] inFileBytes;
|
||||
|
||||
@BeforeClass
|
||||
public static void fileToByteArray() throws IOException {
|
||||
inFileBytes = Files.readAllBytes(Paths.get(IN_FILE));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenJavaBase64_whenEncoded_thenDecodedOK() throws IOException {
|
||||
|
||||
byte[] encoded = java.util.Base64.getEncoder().encode(inFileBytes);
|
||||
|
||||
byte[] decoded = java.util.Base64.getDecoder().decode(encoded);
|
||||
|
||||
writeToFile(OUT_FILE, decoded);
|
||||
|
||||
assertNotEquals(encoded.length, decoded.length);
|
||||
assertEquals(inFileBytes.length, decoded.length);
|
||||
|
||||
assertArrayEquals(decoded, inFileBytes);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenJavaBase64_whenEncodedStream_thenDecodedStreamOK() throws IOException {
|
||||
|
||||
try (OutputStream os = java.util.Base64.getEncoder().wrap(new FileOutputStream(OUT_FILE));
|
||||
FileInputStream fis = new FileInputStream(IN_FILE)) {
|
||||
byte[] bytes = new byte[1024];
|
||||
int read;
|
||||
while ((read = fis.read(bytes)) > -1) {
|
||||
os.write(bytes, 0, read);
|
||||
}
|
||||
}
|
||||
|
||||
byte[] encoded = java.util.Base64.getEncoder().encode(inFileBytes);
|
||||
byte[] encodedOnDisk = Files.readAllBytes(Paths.get(OUT_FILE));
|
||||
assertArrayEquals(encoded, encodedOnDisk);
|
||||
|
||||
byte[] decoded = java.util.Base64.getDecoder().decode(encoded);
|
||||
byte[] decodedOnDisk = java.util.Base64.getDecoder().decode(encodedOnDisk);
|
||||
assertArrayEquals(decoded, decodedOnDisk);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenApacheCommons_givenJavaBase64_whenEncoded_thenDecodedOK() throws IOException {
|
||||
|
||||
byte[] encoded = org.apache.commons.codec.binary.Base64.encodeBase64(inFileBytes);
|
||||
|
||||
byte[] decoded = org.apache.commons.codec.binary.Base64.decodeBase64(encoded);
|
||||
|
||||
writeToFile(OUT_FILE, decoded);
|
||||
|
||||
assertNotEquals(encoded.length, decoded.length);
|
||||
assertEquals(inFileBytes.length, decoded.length);
|
||||
|
||||
assertArrayEquals(decoded, inFileBytes);
|
||||
}
|
||||
|
||||
private void writeToFile(String fileName, byte[] bytes) throws IOException {
|
||||
FileOutputStream fos = new FileOutputStream(fileName);
|
||||
fos.write(bytes);
|
||||
fos.flush();
|
||||
fos.close();
|
||||
}
|
||||
}
|
||||
+52
@@ -0,0 +1,52 @@
|
||||
package com.baeldung.pdfreadertest;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import com.itextpdf.text.pdf.PdfReader;
|
||||
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
|
||||
|
||||
class ReadPdfFileUnitTest {
|
||||
|
||||
@Test
|
||||
public void givenSamplePdf_whenUsingApachePdfBox_thenCompareOutput() throws IOException {
|
||||
String expectedText = "Hello World!\n";
|
||||
|
||||
File file = new File("sample.pdf");
|
||||
PDDocument document = PDDocument.load(file);
|
||||
|
||||
PDFTextStripper stripper = new PDFTextStripper();
|
||||
|
||||
String text = stripper.getText(document);
|
||||
|
||||
document.close();
|
||||
|
||||
assertEquals(expectedText, text);
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void givenSamplePdf_whenUsingiTextPdf_thenCompareOutput() throws IOException {
|
||||
String expectedText = "Hello World!";
|
||||
|
||||
PdfReader reader = new PdfReader("sample.pdf");
|
||||
int pages = reader.getNumberOfPages();
|
||||
StringBuilder text = new StringBuilder();
|
||||
|
||||
for (int i = 1; i <= pages; i++) {
|
||||
|
||||
text.append(PdfTextExtractor.getTextFromPage(reader, i));
|
||||
|
||||
}
|
||||
reader.close();
|
||||
assertEquals(expectedText, text.toString());
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
+52
@@ -0,0 +1,52 @@
|
||||
package com.baeldung.pdfthymeleaf;
|
||||
|
||||
import com.lowagie.text.DocumentException;
|
||||
import org.junit.Test;
|
||||
import org.thymeleaf.TemplateEngine;
|
||||
import org.thymeleaf.context.Context;
|
||||
import org.thymeleaf.templatemode.TemplateMode;
|
||||
import org.thymeleaf.templateresolver.ClassLoaderTemplateResolver;
|
||||
import org.xhtmlrenderer.pdf.ITextRenderer;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
|
||||
import static org.junit.Assert.assertTrue;
|
||||
|
||||
public class PDFThymeleafUnitTest {
|
||||
|
||||
@Test
|
||||
public void givenThymeleafTemplate_whenParsedAndRenderedToPDF_thenItShouldNotBeEmpty() throws DocumentException, IOException {
|
||||
String html = parseThymeleafTemplate();
|
||||
|
||||
ByteArrayOutputStream outputStream = generatePdfOutputStreamFromHtml(html);
|
||||
|
||||
assertTrue(outputStream.size() > 0);
|
||||
}
|
||||
|
||||
private ByteArrayOutputStream generatePdfOutputStreamFromHtml(String html) throws IOException, DocumentException {
|
||||
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
|
||||
|
||||
ITextRenderer renderer = new ITextRenderer();
|
||||
renderer.setDocumentFromString(html);
|
||||
renderer.layout();
|
||||
renderer.createPDF(outputStream);
|
||||
|
||||
outputStream.close();
|
||||
return outputStream;
|
||||
}
|
||||
|
||||
private String parseThymeleafTemplate() {
|
||||
ClassLoaderTemplateResolver templateResolver = new ClassLoaderTemplateResolver();
|
||||
templateResolver.setSuffix(".html");
|
||||
templateResolver.setTemplateMode(TemplateMode.HTML);
|
||||
|
||||
TemplateEngine templateEngine = new TemplateEngine();
|
||||
templateEngine.setTemplateResolver(templateResolver);
|
||||
|
||||
Context context = new Context();
|
||||
context.setVariable("to", "Baeldung.com");
|
||||
|
||||
return templateEngine.process("thymeleaf_template", context);
|
||||
}
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,24 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<artifactId>text-processing-libraries-modules</artifactId>
|
||||
<name>text-processing-libraries-modules</name>
|
||||
<packaging>pom</packaging>
|
||||
|
||||
<parent>
|
||||
<artifactId>parent-modules</artifactId>
|
||||
<groupId>com.baeldung</groupId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
</parent>
|
||||
|
||||
<modules>
|
||||
<module>antlr</module>
|
||||
<module>apache-tika</module>
|
||||
<module>asciidoctor</module>
|
||||
<module>pdf</module>
|
||||
<module>pdf-2</module>
|
||||
</modules>
|
||||
|
||||
</project>
|
||||
Reference in New Issue
Block a user