-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathLuceneNGram.java
More file actions
49 lines (42 loc) · 1.87 KB
/
LuceneNGram.java
File metadata and controls
49 lines (42 loc) · 1.87 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
package examples;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class LuceneNGram {
public static void main(String[] args) throws IOException {
String pathIndex = "C://LuceneData/example_index_lucene";
Set<String> fieldset = new HashSet<>();
fieldset.add( "docno" );
fieldset.add( "title" );
Directory dir = FSDirectory.open( new File( pathIndex ).toPath() );
IndexReader index = DirectoryReader.open( dir );
NGramTokenizer gramTokenizer = new NGramTokenizer(2, 3);
for ( int docid = 0; docid < index.maxDoc() && docid < 1; docid++ ) {
Document doc = index.document( docid, fieldset );
String docno = doc.getField( "docno" ).stringValue();
String title = doc.getField( "title" ).stringValue();
Reader reader = new StringReader(docno);
gramTokenizer.setReader(reader);
CharTermAttribute charTermAttribute = gramTokenizer.addAttribute(CharTermAttribute.class);
gramTokenizer.reset();
while (gramTokenizer.incrementToken()) {
String token = charTermAttribute.toString();
System.out.println(token);
}
gramTokenizer.end();
gramTokenizer.close();
}
index.close();
dir.close();
}
}