This commit is contained in:
Jonathan Cook
2019-10-23 15:01:44 +02:00
parent db85c8f275
commit 684ec0d2e3
20486 changed files with 1642483 additions and 0 deletions
@@ -0,0 +1,79 @@
package com.baeldung.tika;
import static org.hamcrest.CoreMatchers.containsString;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import java.io.IOException;
import java.io.InputStream;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.junit.Test;
import org.xml.sax.SAXException;
public class TikaUnitTest {
@Test
public void whenUsingDetector_thenDocumentTypeIsReturned() throws IOException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.txt");
String mediaType = TikaAnalysis.detectDocTypeUsingDetector(stream);
assertEquals("application/pdf", mediaType);
stream.close();
}
@Test
public void whenUsingFacade_thenDocumentTypeIsReturned() throws IOException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.txt");
String mediaType = TikaAnalysis.detectDocTypeUsingFacade(stream);
assertEquals("application/pdf", mediaType);
stream.close();
}
@Test
public void whenUsingParser_thenContentIsReturned() throws IOException, TikaException, SAXException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.docx");
String content = TikaAnalysis.extractContentUsingParser(stream);
assertThat(content, containsString("Apache Tika - a content analysis toolkit"));
assertThat(content, containsString("detects and extracts metadata and text"));
stream.close();
}
@Test
public void whenUsingFacade_thenContentIsReturned() throws IOException, TikaException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.docx");
String content = TikaAnalysis.extractContentUsingFacade(stream);
assertThat(content, containsString("Apache Tika - a content analysis toolkit"));
assertThat(content, containsString("detects and extracts metadata and text"));
stream.close();
}
@Test
public void whenUsingParser_thenMetadataIsReturned() throws IOException, TikaException, SAXException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.xlsx");
Metadata metadata = TikaAnalysis.extractMetadatatUsingParser(stream);
assertEquals("org.apache.tika.parser.DefaultParser", metadata.get("X-Parsed-By"));
assertEquals("Microsoft Office User", metadata.get("Author"));
stream.close();
}
@Test
public void whenUsingFacade_thenMetadataIsReturned() throws IOException, TikaException {
InputStream stream = this.getClass().getClassLoader().getResourceAsStream("tika.xlsx");
Metadata metadata = TikaAnalysis.extractMetadatatUsingFacade(stream);
assertEquals("org.apache.tika.parser.DefaultParser", metadata.get("X-Parsed-By"));
assertEquals("Microsoft Office User", metadata.get("Author"));
stream.close();
}
}