diff --git a/jsoup/pom.xml b/jsoup/pom.xml
index 989f30422c..343e139b46 100644
--- a/jsoup/pom.xml
+++ b/jsoup/pom.xml
@@ -15,15 +15,16 @@
${jsoup.version}
- commons-io
- commons-io
- ${commons.io.version}
+ junit
+ junit
+ 4.12
+ test
1.8
1.8
- 2.5
+
1.10.1
diff --git a/jsoup/src/main/java/com/baeldung/jsoup/JsoupExample.java b/jsoup/src/main/java/com/baeldung/jsoup/JsoupParser.java
similarity index 66%
rename from jsoup/src/main/java/com/baeldung/jsoup/JsoupExample.java
rename to jsoup/src/main/java/com/baeldung/jsoup/JsoupParser.java
index 10431a621d..cb86b16888 100644
--- a/jsoup/src/main/java/com/baeldung/jsoup/JsoupExample.java
+++ b/jsoup/src/main/java/com/baeldung/jsoup/JsoupParser.java
@@ -1,47 +1,33 @@
package com.baeldung.jsoup;
-import java.io.File;
import java.io.IOException;
-import org.apache.commons.io.FileUtils;
-import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Tag;
import org.jsoup.select.Elements;
-public class JsoupExample {
+public class JsoupParser {
- public static void main(String[] args) throws IOException {
- scrapeSpringBlog();
+ Document doc;
+
+ public void loadDocument(String blogUrl) throws IOException {
+ doc = Jsoup.connect(blogUrl).get();
}
- static void scrapeSpringBlog() throws IOException {
- String blogUrl = "https://spring.io/blog";
- Document doc = Jsoup.connect(blogUrl).get();
-
- try {
- Document doc404 = Jsoup.connect("https://spring.io/will-not-be-found").get();
- } catch (HttpStatusException ex) {
- System.out.println(ex.getMessage());
- }
-
- Document docCustomConn = Jsoup.connect(blogUrl).userAgent("Mozilla").get();
- docCustomConn = Jsoup.connect(blogUrl).timeout(5000).get();
- docCustomConn = Jsoup.connect(blogUrl).cookie("cookiename", "val234").get();
- // docCustomConn = Jsoup.connect(blogUrl).data("datakey", "datavalue").post();
- docCustomConn = Jsoup.connect(blogUrl).header("headersecurity", "xyz123").get();
-
- docCustomConn = Jsoup.connect(blogUrl)
+ void loadDocumentCustomized(String blogUrl) throws IOException {
+ doc = Jsoup.connect(blogUrl)
.userAgent("Mozilla")
.timeout(5000)
.cookie("cookiename", "val234")
.cookie("anothercookie", "ilovejsoup")
+ .referrer("http://google.com")
.header("headersecurity", "xyz123")
.get();
+ }
+ void examplesSelectors() {
Elements links = doc.select("a");
- Elements sections = doc.select("section");
Elements logo = doc.select(".spring-logo--container");
Elements pagination = doc.select("#pagination_control");
Elements divsDescendant = doc.select("header div");
@@ -50,6 +36,14 @@ public class JsoupExample {
Element pag = doc.getElementById("pagination_control");
Elements desktopOnly = doc.getElementsByClass("desktopOnly");
+ Elements sections = doc.select("section");
+ Element firstSection = sections.first();
+ Elements sectionParagraphs = firstSection.select(".paragraph");
+ }
+
+ void examplesTraversing() {
+ Elements sections = doc.select("section");
+
Element firstSection = sections.first();
Element lastSection = sections.last();
Element secondSection = sections.get(2);
@@ -59,9 +53,9 @@ public class JsoupExample {
Elements siblings = firstSection.siblingElements();
sections.stream().forEach(el -> System.out.println("section: " + el));
+ }
- Elements sectionParagraphs = firstSection.select(".paragraph");
-
+ void examplesExtracting() {
Element firstArticle = doc.select("article").first();
Element timeElement = firstArticle.select("time").first();
String dateTimeOfFirstArticle = timeElement.attr("datetime");
@@ -69,7 +63,14 @@ public class JsoupExample {
String sectionDivText = sectionDiv.text();
String articleHtml = firstArticle.html();
String outerHtml = firstArticle.outerHtml();
+ }
+ void examplesModifying() {
+ Element firstArticle = doc.select("article").first();
+ Element timeElement = firstArticle.select("time").first();
+ Element sectionDiv = firstArticle.select("section div").first();
+
+ String dateTimeOfFirstArticle = timeElement.attr("datetime");
timeElement.attr("datetime", "2016-12-16 15:19:54.3");
sectionDiv.text("foo bar");
firstArticle.select("h2").html("
");
@@ -82,8 +83,9 @@ public class JsoupExample {
doc.select("li.navbar-link").remove();
firstArticle.select("img").remove();
+ }
- File indexFile = new File("/tmp", "spring_blog_home.html");
- FileUtils.writeStringToFile(indexFile, doc.html(), doc.charset());
+ String getTidyHtml() {
+ return doc.html();
}
}
diff --git a/jsoup/src/test/java/com/baeldung/jsoup/JsoupParserTest.java b/jsoup/src/test/java/com/baeldung/jsoup/JsoupParserTest.java
new file mode 100644
index 0000000000..85fd3c3459
--- /dev/null
+++ b/jsoup/src/test/java/com/baeldung/jsoup/JsoupParserTest.java
@@ -0,0 +1,36 @@
+package com.baeldung.jsoup;
+
+import java.io.IOException;
+import org.jsoup.HttpStatusException;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
+import org.junit.Before;
+import org.junit.Test;
+
+public class JsoupParserTest {
+
+ JsoupParser jsoupParser;
+
+ @Before
+ public void setUp() {
+ jsoupParser = new JsoupParser();
+ }
+
+ @Test
+ public void test404() throws IOException {
+ try {
+ jsoupParser.loadDocument("https://spring.io/will-not-be-found");
+ } catch (HttpStatusException ex) {
+ assertEquals(404, ex.getStatusCode());
+ }
+ }
+
+ @Test
+ public void testChange() throws IOException {
+ jsoupParser.loadDocument("http://spring.io/blog");
+
+ jsoupParser.examplesModifying();
+
+ assertTrue(jsoupParser.getTidyHtml().contains("http://baeldung.com"));
+ }
+}