Merge branch 'master' into bael-16656

This commit is contained in:
Josh Cummings
2019-10-26 15:37:05 -06:00
committed by GitHub
parent db85c8f275
commit 0be2175c89
20539 changed files with 1643630 additions and 0 deletions
+10
View File
@@ -0,0 +1,10 @@
## jsoup
This module contains articles about jsoup.
### Relevant Articles:
- [Parsing HTML in Java with Jsoup](https://www.baeldung.com/java-with-jsoup)
### Build the Project
mvn clean install
+27
View File
@@ -0,0 +1,27 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>jsoup</artifactId>
<name>jsoup</name>
<packaging>jar</packaging>
<parent>
<groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>
</dependencies>
<properties>
<jsoup.version>1.10.2</jsoup.version>
</properties>
</project>
@@ -0,0 +1,122 @@
package com.baeldung.jsoup;
import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.parser.Tag;
import org.jsoup.select.Elements;
import org.junit.Before;
import org.junit.Test;
import java.io.IOException;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class JsoupParserIntegrationTest {
private Document doc;
@Before
public void setUp() throws IOException {
doc = Jsoup.connect("https://spring.io/blog")
.get();
}
@Test
public void loadDocument404() throws IOException {
try {
doc = Jsoup.connect("https://spring.io/will-not-be-found")
.get();
} catch (HttpStatusException ex) {
assertEquals(404, ex.getStatusCode());
}
}
@Test
public void loadDocumentCustomized() throws IOException {
doc = Jsoup.connect("https://spring.io/blog")
.userAgent("Mozilla")
.timeout(5000)
.cookie("cookiename", "val234")
.cookie("anothercookie", "ilovejsoup")
.referrer("http://google.com")
.header("headersecurity", "xyz123")
.get();
}
@Test
public void examplesSelectors() {
Elements links = doc.select("a");
Elements logo = doc.select(".spring-logo--container");
Elements pagination = doc.select("#pagination_control");
Elements divsDescendant = doc.select("header div");
Elements divsDirect = doc.select("header > div");
Element pag = doc.getElementById("pagination_control");
Elements desktopOnly = doc.getElementsByClass("desktopOnly");
Elements sections = doc.select("section");
Element firstSection = sections.first();
Elements sectionParagraphs = firstSection.select(".paragraph");
}
@Test
public void examplesTraversing() {
Elements sections = doc.select("section");
Element firstSection = sections.first();
Element lastSection = sections.last();
Element secondSection = sections.get(2);
Elements allParents = firstSection.parents();
Element parent = firstSection.parent();
Elements children = firstSection.children();
Elements siblings = firstSection.siblingElements();
sections.forEach(el -> System.out.println("section: " + el));
}
@Test
public void examplesExtracting() {
Element firstArticle = doc.select("article")
.first();
Element timeElement = firstArticle.select("time")
.first();
String dateTimeOfFirstArticle = timeElement.attr("datetime");
Element sectionDiv = firstArticle.select("section div")
.first();
String sectionDivText = sectionDiv.text();
String articleHtml = firstArticle.html();
String outerHtml = firstArticle.outerHtml();
}
@Test
public void examplesModifying() {
Element firstArticle = doc.select("article")
.first();
Element timeElement = firstArticle.select("time")
.first();
Element sectionDiv = firstArticle.select("section div")
.first();
String dateTimeOfFirstArticle = timeElement.attr("datetime");
timeElement.attr("datetime", "2016-12-16 15:19:54.3");
sectionDiv.text("foo bar");
firstArticle.select("h2")
.html("<div><span></span></div>");
Element link = new Element(Tag.valueOf("a"), "").text("Checkout this amazing website!")
.attr("href", "http://baeldung.com")
.attr("target", "_blank");
firstArticle.appendChild(link);
doc.select("li.navbar-link")
.remove();
firstArticle.select("img")
.remove();
assertTrue(doc.html()
.contains("http://baeldung.com"));
}
}