-
Notifications
You must be signed in to change notification settings - Fork 793
Lucene 9 #4867
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Lucene 9 #4867
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -48,7 +48,7 @@ | |
| import org.apache.lucene.search.IndexSearcher; | ||
| import org.apache.lucene.search.Query; | ||
| import org.apache.lucene.search.ScoreDoc; | ||
| import org.apache.lucene.search.TopScoreDocCollector; | ||
| import org.apache.lucene.search.TopScoreDocCollectorManager; | ||
| import org.apache.lucene.util.Version; | ||
| import org.opengrok.indexer.analysis.AbstractAnalyzer; | ||
| import org.opengrok.indexer.analysis.CompatibleAnalyser; | ||
|
|
@@ -132,7 +132,7 @@ public class SearchEngine { | |
| int cachePages = RuntimeEnvironment.getInstance().getCachePages(); | ||
| int totalHits = 0; | ||
| private ScoreDoc[] hits; | ||
| private TopScoreDocCollector collector; | ||
| private TopScoreDocCollectorManager collectorManager; | ||
| private IndexSearcher searcher; | ||
| boolean allCollected; | ||
| private final ArrayList<SuperIndexSearcher> searcherList = new ArrayList<>(); | ||
|
|
@@ -205,18 +205,17 @@ private void searchMultiDatabase(List<Project> projectList, boolean paging) thro | |
| } | ||
|
|
||
| private void searchIndex(IndexSearcher searcher, boolean paging) throws IOException { | ||
| collector = TopScoreDocCollector.create(hitsPerPage * cachePages, Short.MAX_VALUE); | ||
| collectorManager = new TopScoreDocCollectorManager(hitsPerPage * cachePages, Short.MAX_VALUE); | ||
| Statistics stat = new Statistics(); | ||
| searcher.search(query, collector); | ||
| totalHits = collector.getTotalHits(); | ||
| hits = searcher.search(query, collectorManager).scoreDocs; | ||
| totalHits = searcher.count(query); | ||
| stat.report(LOGGER, Level.FINEST, "search via SearchEngine done", | ||
| "search.latency", new String[]{"category", "engine", | ||
| "outcome", totalHits > 0 ? "success" : "empty"}); | ||
| if (!paging && totalHits > 0) { | ||
| collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE); | ||
| searcher.search(query, collector); | ||
| collectorManager = new TopScoreDocCollectorManager(totalHits, Short.MAX_VALUE); | ||
| hits = searcher.search(query, collectorManager).scoreDocs; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Since the new version is compelling this slightly intrusive revision in order to use Also that way we could make sure |
||
| } | ||
| hits = collector.topDocs().scoreDocs; | ||
| StoredFields storedFields = searcher.storedFields(); | ||
| for (ScoreDoc hit : hits) { | ||
| int docId = hit.doc; | ||
|
|
@@ -412,14 +411,13 @@ public void results(int start, int end, List<Hit> ret) { | |
| // TODO check if below fits for if end=old hits.length, or it should include it | ||
| if (end > hits.length && !allCollected) { | ||
| //do the requery, we want more than 5 pages | ||
| collector = TopScoreDocCollector.create(totalHits, Short.MAX_VALUE); | ||
| collectorManager = new TopScoreDocCollectorManager(totalHits, Short.MAX_VALUE); | ||
| try { | ||
| searcher.search(query, collector); | ||
| hits = searcher.search(query, collectorManager).scoreDocs; | ||
| } catch (Exception e) { // this exception should never be hit, since search() will hit this before | ||
| LOGGER.log( | ||
| Level.WARNING, SEARCH_EXCEPTION_MSG, e); | ||
| } | ||
| hits = collector.topDocs().scoreDocs; | ||
| StoredFields storedFields = null; | ||
| try { | ||
| storedFields = searcher.storedFields(); | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -60,7 +60,7 @@ Portions Copyright (c) 2018, 2020, Chris Fraire <[email protected]>. | |
| </scm> | ||
|
|
||
| <properties> | ||
| <lucene.version>9.9.2</lucene.version> | ||
| <lucene.version>9.12.3</lucene.version> | ||
| <mavenjavadocplugin.version>3.6.0</mavenjavadocplugin.version> | ||
| <java.version>17</java.version> | ||
| <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,7 @@ | |
| import org.apache.lucene.index.StoredFields; | ||
| import org.apache.lucene.search.CollectionTerminatedException; | ||
| import org.apache.lucene.search.Collector; | ||
| import org.apache.lucene.search.CollectorManager; | ||
| import org.apache.lucene.search.IndexSearcher; | ||
| import org.apache.lucene.search.LeafCollector; | ||
| import org.apache.lucene.search.Scorable; | ||
|
|
@@ -35,6 +36,7 @@ | |
| import org.opengrok.suggest.query.data.BitIntsHolder; | ||
|
|
||
| import java.io.IOException; | ||
| import java.util.Collection; | ||
|
|
||
| /** | ||
| * Collects Suggester query results. | ||
|
|
@@ -62,6 +64,29 @@ public LeafCollector getLeafCollector(LeafReaderContext context) throws IOExcept | |
| return new SuggesterLeafCollector(context); | ||
| } | ||
|
|
||
| /** | ||
| * Creates a {@link CollectorManager} that can concurrently collect matching docs in a {@link | ||
| * BitIntsHolder}. | ||
| */ | ||
| public static CollectorManager<SuggestResultCollector, BitIntsHolder> createManager(LeafReaderContext leafReaderContext, ComplexQueryData data, | ||
| BitIntsHolder documentIds) { | ||
| return new CollectorManager<>() { | ||
| @Override | ||
| public SuggestResultCollector newCollector() { | ||
| return new SuggestResultCollector(leafReaderContext, data, documentIds); | ||
| } | ||
|
|
||
| @Override | ||
| public BitIntsHolder reduce(Collection<SuggestResultCollector> collectors) { | ||
| BitIntsHolder reduced = documentIds; | ||
| for (SuggestResultCollector collector : collectors) { | ||
| documentIds.or(collector.documentIds); //TODO fix as per https://github.com/apache/lucene/pull/766/files | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can this be addressed for this PR ? I don't like adding TODOs in the code. |
||
| } | ||
| return reduced; | ||
| } | ||
| }; | ||
| } | ||
|
|
||
| /** | ||
| * Indicates what features are required from the scorer. | ||
| */ | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Formerly
totalhitswould have had its accuracy bounded by theShort.MAX_VALUEvalue oftotalHitsThreshold(on theTopScoreDocCollector). For very large indexes, is there any possible performance penalty getting the entire, "fully accurate"intvalue oftotalHits?