This commit is contained in:
Jonathan Cook
2019-10-23 15:01:44 +02:00
parent db85c8f275
commit 684ec0d2e3
20486 changed files with 1642483 additions and 0 deletions
@@ -0,0 +1,128 @@
package com.baeldung.lucene;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
public class InMemoryLuceneIndex {
private Directory memoryIndex;
private Analyzer analyzer;
public InMemoryLuceneIndex(Directory memoryIndex, Analyzer analyzer) {
super();
this.memoryIndex = memoryIndex;
this.analyzer = analyzer;
}
/**
*
* @param title
* @param body
*/
public void indexDocument(String title, String body) {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
try {
IndexWriter writter = new IndexWriter(memoryIndex, indexWriterConfig);
Document document = new Document();
document.add(new TextField("title", title, Field.Store.YES));
document.add(new TextField("body", body, Field.Store.YES));
document.add(new SortedDocValuesField("title", new BytesRef(title)));
writter.addDocument(document);
writter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public List<Document> searchIndex(String inField, String queryString) {
try {
Query query = new QueryParser(inField, analyzer).parse(queryString);
IndexReader indexReader = DirectoryReader.open(memoryIndex);
IndexSearcher searcher = new IndexSearcher(indexReader);
TopDocs topDocs = searcher.search(query, 10);
List<Document> documents = new ArrayList<>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
documents.add(searcher.doc(scoreDoc.doc));
}
return documents;
} catch (IOException | ParseException e) {
e.printStackTrace();
}
return null;
}
public void deleteDocument(Term term) {
try {
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
IndexWriter writter = new IndexWriter(memoryIndex, indexWriterConfig);
writter.deleteDocuments(term);
writter.close();
} catch (IOException e) {
e.printStackTrace();
}
}
public List<Document> searchIndex(Query query) {
try {
IndexReader indexReader = DirectoryReader.open(memoryIndex);
IndexSearcher searcher = new IndexSearcher(indexReader);
TopDocs topDocs = searcher.search(query, 10);
List<Document> documents = new ArrayList<>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
documents.add(searcher.doc(scoreDoc.doc));
}
return documents;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public List<Document> searchIndex(Query query, Sort sort) {
try {
IndexReader indexReader = DirectoryReader.open(memoryIndex);
IndexSearcher searcher = new IndexSearcher(indexReader);
TopDocs topDocs = searcher.search(query, 10, sort);
List<Document> documents = new ArrayList<>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
documents.add(searcher.doc(scoreDoc.doc));
}
return documents;
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
}
@@ -0,0 +1,80 @@
package com.baeldung.lucene;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
public class LuceneFileSearch {
private Directory indexDirectory;
private StandardAnalyzer analyzer;
public LuceneFileSearch(Directory fsDirectory, StandardAnalyzer analyzer) {
super();
this.indexDirectory = fsDirectory;
this.analyzer = analyzer;
}
public void addFileToIndex(String filepath) throws IOException, URISyntaxException {
Path path = Paths.get(getClass().getClassLoader().getResource(filepath).toURI());
File file = path.toFile();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
Document document = new Document();
FileReader fileReader = new FileReader(file);
document.add(new TextField("contents", fileReader));
document.add(new StringField("path", file.getPath(), Field.Store.YES));
document.add(new StringField("filename", file.getName(), Field.Store.YES));
indexWriter.addDocument(document);
indexWriter.close();
}
public List<Document> searchFiles(String inField, String queryString) {
try {
Query query = new QueryParser(inField, analyzer).parse(queryString);
IndexReader indexReader = DirectoryReader.open(indexDirectory);
IndexSearcher searcher = new IndexSearcher(indexReader);
TopDocs topDocs = searcher.search(query, 10);
List<Document> documents = new ArrayList<>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
documents.add(searcher.doc(scoreDoc.doc));
}
return documents;
} catch (IOException | ParseException e) {
e.printStackTrace();
}
return null;
}
}
@@ -0,0 +1,26 @@
package com.baeldung.lucene;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.miscellaneous.CapitalizationFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
public class MyCustomAnalyzer extends Analyzer{
@Override
protected TokenStreamComponents createComponents(String fieldName) {
final StandardTokenizer src = new StandardTokenizer();
TokenStream result = new StandardFilter(src);
result = new LowerCaseFilter(result);
result = new StopFilter(result, StandardAnalyzer.STOP_WORDS_SET);
result = new PorterStemFilter(result);
result = new CapitalizationFilter(result);
return new TokenStreamComponents(src, result);
}
}
+13
View File
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
</pattern>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="STDOUT" />
</root>
</configuration>