Add Google extended sitemap url with support to add multiple image tags to a single URL entry

This commit is contained in:
thorkarlsson
2018-10-30 14:04:45 +07:00
parent 47cc5e01d8
commit e9cabcf104
5 changed files with 461 additions and 1 deletions
@@ -0,0 +1,103 @@
package com.redfin.sitemapgenerator;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
/**
* Builds an extended sitemap with google support for google extensions. To configure options use {@link #builder(URL, File)}
* @see <a href="https://support.google.com/webmasters/answer/183668">Manage your sitemaps</a>
* */
public class GoogleExtensionSitemapGenerator extends SitemapGenerator<GoogleExtensionSitemapUrl, GoogleExtensionSitemapGenerator> {
GoogleExtensionSitemapGenerator(AbstractSitemapGeneratorOptions<?> options) {
super(options, new GoogleExtensionSitemapGenerator.Renderer());
}
/** Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @throws MalformedURLException
*/
public GoogleExtensionSitemapGenerator(String baseUrl, File baseDir)
throws MalformedURLException {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and directory to write the sitemap files.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
*/
public GoogleExtensionSitemapGenerator(URL baseUrl, File baseDir) {
this(new SitemapGeneratorOptions(baseUrl, baseDir));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleExtensionSitemapGenerator(String baseUrl) throws MalformedURLException {
this(new SitemapGeneratorOptions(new URL(baseUrl)));
}
/**Configures the generator with a base URL and a null directory. The object constructed
* is not intended to be used to write to files. Rather, it is intended to be used to obtain
* XML-formatted strings that represent sitemaps.
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
*/
public GoogleExtensionSitemapGenerator(URL baseUrl) {
this(new SitemapGeneratorOptions(baseUrl));
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
*/
public static SitemapGeneratorBuilder<GoogleExtensionSitemapGenerator> builder(URL baseUrl, File baseDir) {
return new SitemapGeneratorBuilder<GoogleExtensionSitemapGenerator>(baseUrl, baseDir, GoogleExtensionSitemapGenerator.class);
}
/** Configures a builder so you can specify sitemap generator options
*
* @param baseUrl All URLs in the generated sitemap(s) should appear under this base URL
* @param baseDir Sitemap files will be generated in this directory as either "sitemap.xml" or "sitemap1.xml" "sitemap2.xml" and so on.
* @return a builder; call .build() on it to make a sitemap generator
* @throws MalformedURLException
*/
public static SitemapGeneratorBuilder<GoogleExtensionSitemapGenerator> builder(String baseUrl, File baseDir) throws MalformedURLException {
return new SitemapGeneratorBuilder<GoogleExtensionSitemapGenerator>(baseUrl, baseDir, GoogleExtensionSitemapGenerator.class);
}
private static class Renderer extends AbstractSitemapUrlRenderer<GoogleExtensionSitemapUrl> implements ISitemapUrlRenderer<GoogleExtensionSitemapUrl> {
public Class<GoogleExtensionSitemapUrl> getUrlClass() {
return GoogleExtensionSitemapUrl.class;
}
public String getXmlNamespaces() {
return "xmlns:image=\"http://www.google.com/schemas/sitemap-image/1.1\"";
}
public void render(GoogleExtensionSitemapUrl url, StringBuilder sb, W3CDateFormat dateFormat) {
StringBuilder tagSb = new StringBuilder();
for(Image image : url.getImages()) {
tagSb.append(" <image:image>\n");
renderTag(tagSb, "image", "loc", image.getUrl());
renderTag(tagSb, "image", "caption", image.getCaption());
renderTag(tagSb, "image", "title", image.getTitle());
renderTag(tagSb, "image", "geo_location", image.getGeoLocation());
renderTag(tagSb, "image", "license", image.getLicense());
tagSb.append(" </image:image>\n");
}
super.render(url, sb, dateFormat, tagSb.toString());
}
}
}
@@ -0,0 +1,74 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
/** Configurable sitemap url with support for Google extensions
* @see <a href="https://support.google.com/webmasters/answer/183668">Manage your sitemaps</a>
*
* TODO Add support for video tags and news tags
*/
public class GoogleExtensionSitemapUrl extends WebSitemapUrl {
private final List<Image> images;
public GoogleExtensionSitemapUrl(String url) throws MalformedURLException {
this(new Options(url));
}
public GoogleExtensionSitemapUrl(URL url) {
this(new Options(url));
}
public GoogleExtensionSitemapUrl(Options options) {
super(options);
this.images = options.images;
}
public void addImage(Image image) {
this.images.add(image);
if(this.images.size() > 1000) {
throw new RuntimeException("A URL cannot have more than 1000 image tags");
}
}
/** Options to configure Google Extension URLs */
public static class Options extends AbstractSitemapUrlOptions<GoogleExtensionSitemapUrl, GoogleExtensionSitemapUrl.Options> {
private List<Image> images;
public Options(URL url) {
super(url, GoogleExtensionSitemapUrl.class);
images = new ArrayList<Image>();
}
public Options(String url) throws MalformedURLException {
super(url, GoogleExtensionSitemapUrl.class);
images = new ArrayList<Image>();
}
public Options images(List<Image> images) {
if(images != null && images.size() > 1000) {
throw new RuntimeException("A URL cannot have more than 1000 image tags");
}
this.images = images;
return this;
}
public Options images(Image...images) {
if(images.length > 1000) {
throw new RuntimeException("A URL cannot have more than 1000 image tags");
}
return images(Arrays.asList(images));
}
}
/**Retrieves list of images*/
public List<Image> getImages() {
return this.images;
}
}
@@ -0,0 +1,100 @@
package com.redfin.sitemapgenerator;
import java.net.MalformedURLException;
import java.net.URL;
/**
* Represent a single image and image properties for use in extended sitemaps
* @see <a href="https://support.google.com/webmasters/answer/178636">Image sitemaps</a>
*/
public class Image {
private final URL url;
private final String title;
private final String caption;
private final String geoLocation;
private final URL license;
public Image(String url) throws MalformedURLException {
this(new URL(url));
}
public Image(URL url) {
this.url = url;
this.title = null;
this.caption = null;
this.geoLocation = null;
this.license = null;
}
public Image(URL url, String title, String caption, String geoLocation, String license) throws MalformedURLException {
this(url, title, caption, geoLocation, new URL(license));
}
public Image(URL url, String title, String caption, String geoLocation, URL license) {
this.url = url;
this.title = title;
this.caption = caption;
this.geoLocation = geoLocation;
this.license = license;
}
/** Retrieves URL of Image*/
public URL getUrl() { return url; }
/** Retrieves title of image*/
public String getTitle() { return title; }
/** Retrieves captionof image*/
public String getCaption() { return caption; }
/** Retrieves geolocation string of image*/
public String getGeoLocation() { return geoLocation; }
/** Retrieves license string of image*/
public URL getLicense() { return license; }
public static class ImageBuilder {
private URL url;
private String title;
private String caption;
private String geoLocation;
private URL license;
public ImageBuilder(String url) throws MalformedURLException {
this(new URL(url));
}
public ImageBuilder(URL url) {
this.url = url;
}
public ImageBuilder title(String title) {
this.title = title;
return this;
}
public ImageBuilder caption(String caption) {
this.caption = caption;
return this;
}
public ImageBuilder geoLocation(String geoLocation) {
this.geoLocation = geoLocation;
return this;
}
public ImageBuilder license(String license) throws MalformedURLException {
return license(new URL(license));
}
public ImageBuilder license(URL license) {
this.license = license;
return this;
}
public Image build() {
return new Image(url, title, caption, geoLocation, license);
}
}
}
@@ -0,0 +1,170 @@
package com.redfin.sitemapgenerator;
import junit.framework.TestCase;
import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
public class GoogleExtensionSitemapUrlTest extends TestCase {
private static final URL LANDING_URL = newURL("http://www.example.com/index.html");
private static final URL CONTENT_URL = newURL("http://www.example.com/index.flv");
File dir;
GoogleExtensionSitemapGenerator wsg;
private static URL newURL(String url) {
try {
return new URL(url);
} catch (MalformedURLException e) {}
return null;
}
public void setUp() throws Exception {
dir = File.createTempFile(GoogleVideoSitemapUrlTest.class.getSimpleName(), "");
dir.delete();
dir.mkdir();
dir.deleteOnExit();
}
public void tearDown() {
wsg = null;
for (File file : dir.listFiles()) {
file.deleteOnExit();
file.delete();
}
dir.delete();
dir = null;
}
public void testSimpleUrl() throws Exception {
wsg = new GoogleExtensionSitemapGenerator("http://www.example.com", dir);
GoogleExtensionSitemapUrl url = new GoogleExtensionSitemapUrl(LANDING_URL);
url.addImage(new Image("http://cdn.example.com/image1.jpg"));
url.addImage(new Image("http://cdn.example.com/image2.jpg"));
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:image=\"http://www.google.com/schemas/sitemap-image/1.1\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image1.jpg</image:loc>\n" +
" </image:image>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image2.jpg</image:loc>\n" +
" </image:image>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testBaseOptions() throws Exception {
wsg = new GoogleExtensionSitemapGenerator("http://www.example.com", dir);
GoogleExtensionSitemapUrl url = new GoogleExtensionSitemapUrl.Options(LANDING_URL)
.images(new Image("http://cdn.example.com/image1.jpg"), new Image("http://cdn.example.com/image2.jpg"))
.priority(0.5)
.changeFreq(ChangeFreq.WEEKLY)
.lastMod(new Date(0))
.build();
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:image=\"http://www.google.com/schemas/sitemap-image/1.1\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <lastmod>1970-01-01T08:00+08:00</lastmod>\n" +
" <changefreq>weekly</changefreq>\n" +
" <priority>0.5</priority>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image1.jpg</image:loc>\n" +
" </image:image>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image2.jpg</image:loc>\n" +
" </image:image>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testImageOptions() throws Exception {
wsg = new GoogleExtensionSitemapGenerator("http://www.example.com", dir);
GoogleExtensionSitemapUrl url = new GoogleExtensionSitemapUrl.Options(LANDING_URL)
.images(new Image.ImageBuilder("http://cdn.example.com/image1.jpg")
.title("image1.jpg")
.caption("An image of the number 1")
.geoLocation("Pyongyang, North Korea")
.license("http://cdn.example.com/licenses/imagelicense.txt")
.build(),
new Image.ImageBuilder("http://cdn.example.com/image2.jpg")
.title("image2.jpg")
.caption("An image of the number 2")
.geoLocation("Pyongyang, North Korea")
.license("http://cdn.example.com/licenses/imagelicense.txt")
.build())
.priority(0.5)
.changeFreq(ChangeFreq.WEEKLY)
.lastMod(new Date(0))
.build();
wsg.addUrl(url);
String expected = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\" xmlns:image=\"http://www.google.com/schemas/sitemap-image/1.1\" >\n" +
" <url>\n" +
" <loc>http://www.example.com/index.html</loc>\n" +
" <lastmod>1970-01-01T08:00+08:00</lastmod>\n" +
" <changefreq>weekly</changefreq>\n" +
" <priority>0.5</priority>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image1.jpg</image:loc>\n" +
" <image:caption>An image of the number 1</image:caption>\n" +
" <image:title>image1.jpg</image:title>\n" +
" <image:geo_location>Pyongyang, North Korea</image:geo_location>\n" +
" <image:license>http://cdn.example.com/licenses/imagelicense.txt</image:license>\n" +
" </image:image>\n" +
" <image:image>\n" +
" <image:loc>http://cdn.example.com/image2.jpg</image:loc>\n" +
" <image:caption>An image of the number 2</image:caption>\n" +
" <image:title>image2.jpg</image:title>\n" +
" <image:geo_location>Pyongyang, North Korea</image:geo_location>\n" +
" <image:license>http://cdn.example.com/licenses/imagelicense.txt</image:license>\n" +
" </image:image>\n" +
" </url>\n" +
"</urlset>";
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testTooManyImages() throws Exception {
wsg = new GoogleExtensionSitemapGenerator("http://www.example.com", dir);
List<Image> images = new ArrayList<Image>();
for(int i = 0; i <= 1000; i++) {
images.add(new Image("http://cdn.example.com/image" + i + ".jpg"));
}
try {
GoogleExtensionSitemapUrl url = new GoogleExtensionSitemapUrl.Options(LANDING_URL)
.images(images)
.priority(0.5)
.changeFreq(ChangeFreq.WEEKLY)
.lastMod(new Date(0))
.build();
fail("Too many images allowed");
} catch (RuntimeException r) {}
}
private String writeSingleSiteMap(GoogleExtensionSitemapGenerator wsg) {
List<File> files = wsg.write();
assertEquals("Too many files: " + files.toString(), 1, files.size());
assertEquals("Sitemap misnamed", "sitemap.xml", files.get(0).getName());
return TestUtil.slurpFileAndDelete(files.get(0));
}
}
@@ -154,7 +154,20 @@ public class SitemapGeneratorTest extends TestCase {
String sitemap = writeSingleSiteMap(wsg);
assertEquals(expected, sitemap);
}
public void testUrlWithImages() throws Exception {
W3CDateFormat df = new W3CDateFormat();
df.setTimeZone(W3CDateFormat.ZULU);
wsg = WebSitemapGenerator.builder("http://www.example.com", dir).dateFormat(df).autoValidate(true).build();
WebSitemapUrl url = new WebSitemapUrl.Options("http://www.example.com")
.build();
wsg.addUrl(url);
String sitemap = writeSingleSiteMap(wsg);
System.out.println(sitemap);
}
public void testBadUrl() throws Exception {
wsg = new WebSitemapGenerator("http://www.example.com", dir);
try {