diff --git a/libraries/OpenNLP/PartOfSpeechTag.txt b/libraries/OpenNLP/PartOfSpeechTag.txt
new file mode 100644
index 0000000000..fdd8238ec4
--- /dev/null
+++ b/libraries/OpenNLP/PartOfSpeechTag.txt
@@ -0,0 +1 @@
+Out of the night that covers me
\ No newline at end of file
diff --git a/libraries/OpenNLP/doc-cat.train b/libraries/OpenNLP/doc-cat.train
new file mode 100644
index 0000000000..c457221ec6
--- /dev/null
+++ b/libraries/OpenNLP/doc-cat.train
@@ -0,0 +1,10 @@
+GOOD good morning /
+GOOD good evening /
+GOOD have a good day /
+GOOD nice party! /
+GOOD fine pants /
+BAD nightmare volcano in the sea /
+BAD darkest sky /
+BAD greed and waste /
+BAD army attacks /
+BAD bomb explodes /
\ No newline at end of file
diff --git a/libraries/OpenNLP/en-chunker.bin b/libraries/OpenNLP/en-chunker.bin
new file mode 100644
index 0000000000..65d9356888
Binary files /dev/null and b/libraries/OpenNLP/en-chunker.bin differ
diff --git a/libraries/OpenNLP/en-ner-location.bin b/libraries/OpenNLP/en-ner-location.bin
new file mode 100644
index 0000000000..f3788bc1f6
Binary files /dev/null and b/libraries/OpenNLP/en-ner-location.bin differ
diff --git a/libraries/OpenNLP/en-ner-person.bin b/libraries/OpenNLP/en-ner-person.bin
new file mode 100644
index 0000000000..2f68318203
Binary files /dev/null and b/libraries/OpenNLP/en-ner-person.bin differ
diff --git a/libraries/OpenNLP/en-pos-maxent.bin b/libraries/OpenNLP/en-pos-maxent.bin
new file mode 100644
index 0000000000..c8cae23c5f
Binary files /dev/null and b/libraries/OpenNLP/en-pos-maxent.bin differ
diff --git a/libraries/OpenNLP/en-sent.bin b/libraries/OpenNLP/en-sent.bin
new file mode 100644
index 0000000000..e89076be5a
Binary files /dev/null and b/libraries/OpenNLP/en-sent.bin differ
diff --git a/libraries/OpenNLP/en-token.bin b/libraries/OpenNLP/en-token.bin
new file mode 100644
index 0000000000..c417277ca7
Binary files /dev/null and b/libraries/OpenNLP/en-token.bin differ
diff --git a/libraries/pom.xml b/libraries/pom.xml
index 653f21d1f6..bc40514b2f 100644
--- a/libraries/pom.xml
+++ b/libraries/pom.xml
@@ -323,6 +323,13 @@
netty-all
${netty.version}
+
+
+ org.apache.opennlp
+ opennlp-tools
+ 1.8.0
+
+
0.7.0
@@ -350,4 +357,4 @@
4.1.10.Final
-
+
\ No newline at end of file
diff --git a/libraries/src/main/java/com/baeldung/opennlp/OpenNLP.java b/libraries/src/main/java/com/baeldung/opennlp/OpenNLP.java
new file mode 100644
index 0000000000..b2fa8e629b
--- /dev/null
+++ b/libraries/src/main/java/com/baeldung/opennlp/OpenNLP.java
@@ -0,0 +1,166 @@
+package com.baeldung.opennlp;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Arrays;
+import java.util.logging.Logger;
+
+import opennlp.tools.chunker.ChunkerME;
+import opennlp.tools.chunker.ChunkerModel;
+import opennlp.tools.cmdline.postag.POSModelLoader;
+import opennlp.tools.doccat.DoccatFactory;
+import opennlp.tools.doccat.DoccatModel;
+import opennlp.tools.doccat.DocumentCategorizerME;
+import opennlp.tools.doccat.DocumentSample;
+import opennlp.tools.doccat.DocumentSampleStream;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.util.InvalidFormatException;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.Span;
+import opennlp.tools.util.TrainingParameters;
+
+public class OpenNLP {
+
+ private final static Logger LOGGER = Logger.getLogger(OpenNLP.class.getName());
+ private final static String text = "To get to the south: Go to the store. Buy a compass. Use the compass. Then walk to the south.";
+ private final static String sentence[] = new String[] { "James", "Jordan", "live", "in", "Oklahoma", "city", "." };
+
+ private DoccatModel docCatModel;
+
+ public static void main(String[] args) {
+ new OpenNLP();
+ }
+
+ public OpenNLP() {
+ try {
+ sentenceDetector();
+ tokenizer();
+ nameFinder();
+ locationFinder();
+ trainDocumentCategorizer();
+ documentCategorizer();
+ partOfSpeechTagger();
+ chunker();
+ } catch (InvalidFormatException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ public void sentenceDetector() throws InvalidFormatException, IOException {
+
+ InputStream is = new FileInputStream("OpenNLP/en-sent.bin");
+ SentenceModel model = new SentenceModel(is);
+ SentenceDetectorME sdetector = new SentenceDetectorME(model);
+ String sentences[] = sdetector.sentDetect(text);
+ Arrays.stream(sentences).forEach(LOGGER::info);
+ is.close();
+ }
+
+ public void tokenizer() throws InvalidFormatException, IOException {
+ InputStream is = new FileInputStream("OpenNLP/en-token.bin");
+ TokenizerModel model = new TokenizerModel(is);
+ Tokenizer tokenizer = new TokenizerME(model);
+ String tokens[] = tokenizer.tokenize(text);
+ Arrays.stream(tokens).forEach(LOGGER::info);
+ is.close();
+ }
+
+ public static void nameFinder() throws IOException {
+ InputStream is = new FileInputStream("OpenNLP/en-ner-person.bin");
+ TokenNameFinderModel model = new TokenNameFinderModel(is);
+ is.close();
+ NameFinderME nameFinder = new NameFinderME(model);
+ Span nameSpans[] = nameFinder.find(sentence);
+ String[] names = Span.spansToStrings(nameSpans, sentence);
+ Arrays.stream(names).forEach(LOGGER::info);
+ }
+
+ public static void locationFinder() throws IOException {
+ InputStream is = new FileInputStream("OpenNLP/en-ner-location.bin");
+ TokenNameFinderModel model = new TokenNameFinderModel(is);
+ is.close();
+ NameFinderME nameFinder = new NameFinderME(model);
+ Span locationSpans[] = nameFinder.find(sentence);
+ String[] locations = Span.spansToStrings(locationSpans, sentence);
+ Arrays.stream(locations).forEach(LOGGER::info);
+ }
+
+ public void trainDocumentCategorizer() {
+
+ try {
+ InputStreamFactory isf = new InputStreamFactory() {
+ public InputStream createInputStream() throws IOException {
+ return new FileInputStream("OpenNLP/doc-cat.train");
+ }
+ };
+ ObjectStream lineStream = new PlainTextByLineStream(isf, "UTF-8");
+ ObjectStream sampleStream = new DocumentSampleStream(lineStream);
+ DoccatFactory docCatFactory = new DoccatFactory();
+ docCatModel = DocumentCategorizerME.train("en", sampleStream, TrainingParameters.defaultParams(), docCatFactory);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ public void documentCategorizer() {
+ DocumentCategorizerME myCategorizer = new DocumentCategorizerME(docCatModel);
+ double[] outcomes = myCategorizer.categorize(sentence);
+ String category = myCategorizer.getBestCategory(outcomes);
+
+ if (category.equalsIgnoreCase("GOOD")) {
+ LOGGER.info("Document is positive :) ");
+ } else {
+ LOGGER.info("Document is negative :( ");
+ }
+ }
+
+ public static void partOfSpeechTagger() throws IOException {
+ try {
+ POSModel posModel = new POSModelLoader().load(new File("OpenNLP/en-pos-maxent.bin"));
+ POSTaggerME posTaggerME = new POSTaggerME(posModel);
+ InputStreamFactory isf = new InputStreamFactory() {
+ public InputStream createInputStream() throws IOException {
+ return new FileInputStream("OpenNLP/PartOfSpeechTag.txt");
+ }
+ };
+ ObjectStream lineStream = new PlainTextByLineStream(isf, "UTF-8");
+ String line;
+ while ((line = lineStream.read()) != null) {
+ String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
+ String[] tags = posTaggerME.tag(whitespaceTokenizerLine);
+ POSSample posSample = new POSSample(whitespaceTokenizerLine, tags);
+ LOGGER.info(posSample.toString());
+ }
+ lineStream.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ public static void chunker() throws IOException {
+ InputStream is = new FileInputStream("OpenNLP/en-chunker.bin");
+ ChunkerModel cModel = new ChunkerModel(is);
+ ChunkerME chunkerME = new ChunkerME(cModel);
+ String[] taggedSentence = new String[] {"Out", "of", "the", "night", "that", "covers", "me"};
+ String pos[] = new String[] { "IN", "IN", "DT", "NN", "WDT", "VBZ", "PRP"};
+ String chunks[] = chunkerME.chunk(taggedSentence, pos);
+ Arrays.stream(chunks).forEach(LOGGER::info);
+ }
+
+}
diff --git a/libraries/src/test/java/com/baeldung/opennlp/OpenNLPTests.java b/libraries/src/test/java/com/baeldung/opennlp/OpenNLPTests.java
new file mode 100644
index 0000000000..a38791fd61
--- /dev/null
+++ b/libraries/src/test/java/com/baeldung/opennlp/OpenNLPTests.java
@@ -0,0 +1,158 @@
+package com.baeldung.opennlp;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.junit.Test;
+
+import opennlp.tools.chunker.ChunkerME;
+import opennlp.tools.chunker.ChunkerModel;
+import opennlp.tools.cmdline.postag.POSModelLoader;
+import opennlp.tools.doccat.DoccatFactory;
+import opennlp.tools.doccat.DoccatModel;
+import opennlp.tools.doccat.DocumentCategorizerME;
+import opennlp.tools.doccat.DocumentSample;
+import opennlp.tools.doccat.DocumentSampleStream;
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.WhitespaceTokenizer;
+import opennlp.tools.util.InputStreamFactory;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.PlainTextByLineStream;
+import opennlp.tools.util.Span;
+import opennlp.tools.util.TrainingParameters;
+
+public class OpenNLPTests {
+
+ private final static String text = "To get to the south: Go to the store. Buy a compass. Use the compass. Then walk to the south.";
+ private final static String sentence[] = new String[] { "James", "Jordan", "live", "in", "Oklahoma", "city", "." };
+
+ @Test
+ public void givenText_WhenDetectSentences_ThenCountSentences(){
+ InputStream is;
+ SentenceModel model;
+ try {
+ is = new FileInputStream("OpenNLP/en-sent.bin");
+ model = new SentenceModel(is);
+ SentenceDetectorME sdetector = new SentenceDetectorME(model);
+ String sentences[] = sdetector.sentDetect(text);
+ assertEquals(4, sentences.length);
+ is.close();
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Test
+ public void givenText_WhenDetectTokens_ThenVerifyNames(){
+ InputStream is;
+ TokenNameFinderModel model;
+ try {
+ is = new FileInputStream("OpenNLP/en-ner-person.bin");
+ model = new TokenNameFinderModel(is);
+ is.close();
+ NameFinderME nameFinder = new NameFinderME(model);
+ Span nameSpans[] = nameFinder.find(sentence);
+ String[] names = Span.spansToStrings(nameSpans, sentence);
+ assertEquals(1, names.length);
+ assertEquals("James Jordan", names[0]);
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Test
+ public void givenText_WhenDetectTokens_ThenVerifyLocations(){
+ InputStream is;
+ TokenNameFinderModel model;
+ try {
+ is = new FileInputStream("OpenNLP/en-ner-location.bin");
+ model = new TokenNameFinderModel(is);
+ is.close();
+ NameFinderME nameFinder = new NameFinderME(model);
+ Span locationSpans[] = nameFinder.find(sentence);
+ String[] locations = Span.spansToStrings(locationSpans, sentence);
+ assertEquals(1, locations.length);
+ assertEquals("Oklahoma", locations[0]);
+ } catch (FileNotFoundException e) {
+ e.printStackTrace();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Test
+ public void givenText_WhenCategorizeDocument_ThenVerifyDocumentContent(){
+ DoccatModel docCatModel;
+ try {
+ InputStreamFactory isf = new InputStreamFactory() {
+ public InputStream createInputStream() throws IOException {
+ return new FileInputStream("OpenNLP/doc-cat.train");
+ }
+ };
+ ObjectStream lineStream = new PlainTextByLineStream(isf, "UTF-8");
+ ObjectStream sampleStream = new DocumentSampleStream(lineStream);
+ DoccatFactory docCatFactory = new DoccatFactory();
+ docCatModel = DocumentCategorizerME.train("en", sampleStream, TrainingParameters.defaultParams(), docCatFactory);
+ DocumentCategorizerME myCategorizer = new DocumentCategorizerME(docCatModel);
+ double[] outcomes = myCategorizer.categorize(sentence);
+ String category = myCategorizer.getBestCategory(outcomes);
+ assertEquals("GOOD", category);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Test
+ public void givenText_WhenTagDocument_ThenVerifyTaggedString(){
+ try {
+ POSModel posModel = new POSModelLoader().load(new File("OpenNLP/en-pos-maxent.bin"));
+ POSTaggerME posTaggerME = new POSTaggerME(posModel);
+ InputStreamFactory isf = new InputStreamFactory() {
+ public InputStream createInputStream() throws IOException {
+ return new FileInputStream("OpenNLP/PartOfSpeechTag.txt");
+ }
+ };
+ ObjectStream lineStream = new PlainTextByLineStream(isf, "UTF-8");
+ String line;
+ while ((line = lineStream.read()) != null) {
+ String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
+ String[] tags = posTaggerME.tag(whitespaceTokenizerLine);
+ POSSample posSample = new POSSample(whitespaceTokenizerLine, tags);
+ assertEquals("Out_IN of_IN the_DT night_NN that_WDT covers_VBZ me_PRP", posSample.toString());
+ }
+ lineStream.close();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ @Test
+ public void givenText_WhenChunked_ThenCountChunks(){
+ try {
+ InputStream is = new FileInputStream("OpenNLP/en-chunker.bin");
+ ChunkerModel cModel = new ChunkerModel(is);
+ ChunkerME chunkerME = new ChunkerME(cModel);
+ String pos[] = new String[] { "NNP", "NNP", "NNP", "POS", "NNP", "NN", "VBD"};
+ String chunks[] = chunkerME.chunk(sentence, pos);
+ assertEquals(7, chunks.length);
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+}