From 0919fb25b3ea0fb78c3670f1c5beb02ee9a6a60b Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 26 Jul 2017 15:42:59 +0200 Subject: [PATCH 1/2] Caching a MinDocQuery can lead to wrong results. Queries are supposed to be cacheable per segment, yet matches of this query also depend on how many documents exist on previous segments. --- .../apache/lucene/queries/MinDocQuery.java | 28 +++++++++++++++++-- .../lucene/queries/MinDocQueryTests.java | 15 ++++++++++ 2 files changed, 41 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/apache/lucene/queries/MinDocQuery.java b/core/src/main/java/org/apache/lucene/queries/MinDocQuery.java index 65c5c0f707c5b..7e857441de745 100644 --- a/core/src/main/java/org/apache/lucene/queries/MinDocQuery.java +++ b/core/src/main/java/org/apache/lucene/queries/MinDocQuery.java @@ -19,6 +19,7 @@ package org.apache.lucene.queries; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.ConstantScoreScorer; import org.apache.lucene.search.ConstantScoreWeight; @@ -35,16 +36,26 @@ * to a configured doc ID. */ public final class MinDocQuery extends Query { + // Matching documents depend on the sequence of segments that the index reader + // wraps. Yes matches must be cacheable per-segment, so we need to incorporate + // the reader id in the identity of the query so that a cache entry may only + // be reused if this query is run against the same index reader. + private final Object readerId; private final int minDoc; /** Sole constructor. */ public MinDocQuery(int minDoc) { + this(minDoc, null); + } + + MinDocQuery(int minDoc, Object readerId) { this.minDoc = minDoc; + this.readerId = readerId; } @Override public int hashCode() { - return Objects.hash(classHash(), minDoc); + return Objects.hash(classHash(), minDoc, readerId); } @Override @@ -53,11 +64,24 @@ public boolean equals(Object obj) { return false; } MinDocQuery that = (MinDocQuery) obj; - return minDoc == that.minDoc; + return minDoc == that.minDoc && Objects.equals(readerId, that.readerId); + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + if (Objects.equals(reader.getContext().id(), readerId) == false) { + return new MinDocQuery(minDoc, reader.getContext().id()); + } + return this; } @Override public Weight createWeight(IndexSearcher searcher, boolean needsScores, float boost) throws IOException { + if (readerId == null) { + throw new IllegalStateException("Rewrite first"); + } else if (Objects.equals(searcher.getIndexReader().getContext().id(), readerId) == false) { + throw new IllegalStateException("Executing against a different reader than the query has been rewritten against"); + } return new ConstantScoreWeight(this, boost) { @Override public Scorer scorer(LeafReaderContext context) throws IOException { diff --git a/core/src/test/java/org/apache/lucene/queries/MinDocQueryTests.java b/core/src/test/java/org/apache/lucene/queries/MinDocQueryTests.java index 3b9671d195a4f..b5bddcaebfe7e 100644 --- a/core/src/test/java/org/apache/lucene/queries/MinDocQueryTests.java +++ b/core/src/test/java/org/apache/lucene/queries/MinDocQueryTests.java @@ -21,8 +21,10 @@ import org.apache.lucene.document.Document; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryUtils; import org.apache.lucene.store.Directory; import org.elasticsearch.test.ESTestCase; @@ -38,6 +40,19 @@ public void testBasics() { QueryUtils.check(query1); QueryUtils.checkEqual(query1, query2); QueryUtils.checkUnequal(query1, query3); + + MinDocQuery query4 = new MinDocQuery(42, new Object()); + MinDocQuery query5 = new MinDocQuery(42, new Object()); + QueryUtils.checkUnequal(query4, query5); + } + + public void testRewrite() throws Exception { + IndexReader reader = new MultiReader(); + MinDocQuery query = new MinDocQuery(42); + Query rewritten = query.rewrite(reader); + QueryUtils.checkUnequal(query, rewritten); + Query rewritten2 = rewritten.rewrite(reader); + assertSame(rewritten, rewritten2); } public void testRandom() throws IOException { From a4db07287d598c1cc5f5004fd10383c923085af4 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Thu, 27 Jul 2017 11:18:38 +0200 Subject: [PATCH 2/2] iter --- core/src/main/java/org/apache/lucene/queries/MinDocQuery.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/apache/lucene/queries/MinDocQuery.java b/core/src/main/java/org/apache/lucene/queries/MinDocQuery.java index 7e857441de745..e882e72742904 100644 --- a/core/src/main/java/org/apache/lucene/queries/MinDocQuery.java +++ b/core/src/main/java/org/apache/lucene/queries/MinDocQuery.java @@ -37,7 +37,7 @@ public final class MinDocQuery extends Query { // Matching documents depend on the sequence of segments that the index reader - // wraps. Yes matches must be cacheable per-segment, so we need to incorporate + // wraps. Yet matches must be cacheable per-segment, so we need to incorporate // the reader id in the identity of the query so that a cache entry may only // be reused if this query is run against the same index reader. private final Object readerId;