diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java index 24b210c29d584..8f1bb2a9310d3 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java @@ -143,7 +143,7 @@ static Result analyze(Query query, Version indexVersion) { } private static BiFunction matchNoDocsQuery() { - return (query, version) -> new Result(true, Collections.emptySet(), 1); + return (query, version) -> new Result(true, Collections.emptySet(), 0); } private static BiFunction matchAllDocsQuery() { @@ -179,28 +179,28 @@ private static BiFunction termInSetQuery() { for (BytesRef term = iterator.next(); term != null; term = iterator.next()) { terms.add(new QueryExtraction(new Term(iterator.field(), term))); } - return new Result(true, terms, 1); + return new Result(true, terms, Math.min(1, terms.size())); }; } private static BiFunction synonymQuery() { return (query, version) -> { Set terms = ((SynonymQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); - return new Result(true, terms, 1); + return new Result(true, terms, Math.min(1, terms.size())); }; } private static BiFunction commonTermsQuery() { return (query, version) -> { Set terms = ((CommonTermsQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); - return new Result(false, terms, 1); + return new Result(false, terms, Math.min(1, terms.size())); }; } private static BiFunction blendedTermQuery() { return (query, version) -> { Set terms = ((BlendedTermQuery) query).getTerms().stream().map(QueryExtraction::new).collect(toSet()); - return new Result(true, terms, 1); + return new Result(true, terms, Math.min(1, terms.size())); }; } @@ -208,7 +208,7 @@ private static BiFunction phraseQuery() { return (query, version) -> { Term[] terms = ((PhraseQuery) query).getTerms(); if (terms.length == 0) { - return new Result(true, Collections.emptySet(), 1); + return new Result(true, Collections.emptySet(), 0); } if (version.onOrAfter(Version.V_6_1_0)) { @@ -232,7 +232,7 @@ private static BiFunction multiPhraseQuery() { return (query, version) -> { Term[][] terms = ((MultiPhraseQuery) query).getTermArrays(); if (terms.length == 0) { - return new Result(true, Collections.emptySet(), 1); + return new Result(true, Collections.emptySet(), 0); } if (version.onOrAfter(Version.V_6_1_0)) { @@ -297,7 +297,7 @@ private static BiFunction spanOrQuery() { for (SpanQuery clause : spanOrQuery.getClauses()) { terms.addAll(analyze(clause, version).extractions); } - return new Result(false, terms, 1); + return new Result(false, terms, Math.min(1, terms.size())); }; } @@ -334,6 +334,9 @@ private static BiFunction booleanQuery() { numOptionalClauses++; } } + if (minimumShouldMatch > numOptionalClauses) { + return new Result(false, Collections.emptySet(), 0); + } if (numRequiredClauses > 0) { if (version.onOrAfter(Version.V_6_1_0)) { UnsupportedQueryException uqe = null; @@ -345,7 +348,12 @@ private static BiFunction booleanQuery() { // since they are completely optional. try { - results.add(analyze(clause.getQuery(), version)); + Result subResult = analyze(clause.getQuery(), version); + if (subResult.matchAllDocs == false && subResult.extractions.isEmpty()) { + // doesn't match anything + return subResult; + } + results.add(subResult); } catch (UnsupportedQueryException e) { uqe = e; } @@ -400,7 +408,11 @@ private static BiFunction booleanQuery() { } msm += resultMsm; - verified &= result.verified; + if (result.verified == false + // If some inner extractions are optional, the result can't be verified + || result.minimumShouldMatch < result.extractions.size()) { + verified = false; + } matchAllDocs &= result.matchAllDocs; extractions.addAll(result.extractions); } @@ -492,7 +504,7 @@ private static BiFunction pointRangeQuery() { // Need to check whether upper is not smaller than lower, otherwise NumericUtils.subtract(...) fails IAE // If upper is really smaller than lower then we deal with like MatchNoDocsQuery. (verified and no extractions) if (new BytesRef(lowerPoint).compareTo(new BytesRef(upperPoint)) > 0) { - return new Result(true, Collections.emptySet(), 1); + return new Result(true, Collections.emptySet(), 0); } byte[] interval = new byte[16]; @@ -537,7 +549,15 @@ private static Result handleDisjunction(List disjunctions, int requiredSh for (int i = 0; i < disjunctions.size(); i++) { Query disjunct = disjunctions.get(i); Result subResult = analyze(disjunct, version); - verified &= subResult.verified; + if (subResult.verified == false + // one of the sub queries requires more than one term to match, we can't + // verify it with a single top-level min_should_match + || subResult.minimumShouldMatch > 1 + // One of the inner clauses has multiple extractions, we won't be able to + // verify it with a single top-level min_should_match + || (subResult.extractions.size() > 1 && requiredShouldClauses > 1)) { + verified = false; + } if (subResult.matchAllDocs) { numMatchAllClauses++; } @@ -683,6 +703,10 @@ static class Result { final boolean matchAllDocs; Result(boolean verified, Set extractions, int minimumShouldMatch) { + if (minimumShouldMatch > extractions.size()) { + throw new IllegalArgumentException("minimumShouldMatch can't be greater than the number of extractions: " + + minimumShouldMatch + " > " + extractions.size()); + } this.extractions = extractions; this.verified = verified; this.minimumShouldMatch = minimumShouldMatch; diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java index 59f4e091140ea..27d72b2926749 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/CandidateQueryTests.java @@ -210,12 +210,13 @@ public void testDuel() throws Exception { new BytesRef(randomFrom(stringContent.get(field1))))); queryFunctions.add(() -> new TermInSetQuery(field2, new BytesRef(randomFrom(stringContent.get(field1))), new BytesRef(randomFrom(stringContent.get(field1))))); - int numRandomBoolQueries = randomIntBetween(16, 32); + // many iterations with boolean queries, which are the most complex queries to deal with when nested + int numRandomBoolQueries = 1000; for (int i = 0; i < numRandomBoolQueries; i++) { queryFunctions.add(() -> createRandomBooleanQuery(1, stringFields, stringContent, intFieldType, intValues)); } queryFunctions.add(() -> { - int numClauses = randomIntBetween(1, 16); + int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4)); List clauses = new ArrayList<>(); for (int i = 0; i < numClauses; i++) { String field = randomFrom(stringFields); @@ -266,7 +267,7 @@ public void testDuel() throws Exception { private BooleanQuery createRandomBooleanQuery(int depth, List fields, Map> content, MappedFieldType intFieldType, List intValues) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); - int numClauses = randomIntBetween(1, 16); + int numClauses = randomIntBetween(1, 1 << randomIntBetween(2, 4)); // use low numbers of clauses more often int numShouldClauses = 0; boolean onlyShouldClauses = rarely(); for (int i = 0; i < numClauses; i++) { @@ -313,7 +314,7 @@ private BooleanQuery createRandomBooleanQuery(int depth, List fields, Ma numShouldClauses++; } } - builder.setMinimumNumberShouldMatch(numShouldClauses); + builder.setMinimumNumberShouldMatch(randomIntBetween(0, numShouldClauses)); return builder.build(); } diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java index 5968f8c3f8327..7bcdcd2e1f695 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java @@ -44,6 +44,7 @@ import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.join.QueryBitSetProducer; import org.apache.lucene.search.join.ScoreMode; import org.apache.lucene.search.spans.SpanFirstQuery; @@ -227,23 +228,87 @@ public void testExtractQueryMetadata_booleanQuery_pre6dot1() { public void testExtractQueryMetadata_booleanQuery_msm() { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(2); - TermQuery termQuery1 = new TermQuery(new Term("_field", "_term1")); + Term term1 = new Term("_field", "_term1"); + TermQuery termQuery1 = new TermQuery(term1); builder.add(termQuery1, BooleanClause.Occur.SHOULD); - TermQuery termQuery2 = new TermQuery(new Term("_field", "_term2")); + Term term2 = new Term("_field", "_term2"); + TermQuery termQuery2 = new TermQuery(term2); builder.add(termQuery2, BooleanClause.Occur.SHOULD); - TermQuery termQuery3 = new TermQuery(new Term("_field", "_term3")); + Term term3 = new Term("_field", "_term3"); + TermQuery termQuery3 = new TermQuery(term3); builder.add(termQuery3, BooleanClause.Occur.SHOULD); BooleanQuery booleanQuery = builder.build(); Result result = analyze(booleanQuery, Version.CURRENT); assertThat(result.verified, is(true)); assertThat(result.minimumShouldMatch, equalTo(2)); - List extractions = new ArrayList<>(result.extractions); - extractions.sort(Comparator.comparing(extraction -> extraction.term)); - assertThat(extractions.size(), equalTo(3)); - assertThat(extractions.get(0).term, equalTo(new Term("_field", "_term1"))); - assertThat(extractions.get(1).term, equalTo(new Term("_field", "_term2"))); - assertThat(extractions.get(2).term, equalTo(new Term("_field", "_term3"))); + assertTermsEqual(result.extractions, term1, term2, term3); + + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.SHOULD) + .add(termQuery2, Occur.SHOULD) + .build(), Occur.SHOULD) + .add(termQuery3, Occur.SHOULD) + .setMinimumNumberShouldMatch(2); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertTermsEqual(result.extractions, term1, term2, term3); + + Term term4 = new Term("_field", "_term4"); + TermQuery termQuery4 = new TermQuery(term4); + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.MUST) + .add(termQuery2, Occur.FILTER) + .build(), Occur.SHOULD) + .add(new BooleanQuery.Builder() + .add(termQuery3, Occur.MUST) + .add(termQuery4, Occur.FILTER) + .build(), Occur.SHOULD); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertTermsEqual(result.extractions, term1, term2, term3, term4); + + Term term5 = new Term("_field", "_term5"); + TermQuery termQuery5 = new TermQuery(term5); + builder.add(termQuery5, Occur.SHOULD); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(1)); + assertTermsEqual(result.extractions, term1, term2, term3, term4, term5); + + builder.setMinimumNumberShouldMatch(2); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(3)); + assertTermsEqual(result.extractions, term1, term2, term3, term4, term5); + + builder.setMinimumNumberShouldMatch(3); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + assertThat(result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(5)); + assertTermsEqual(result.extractions, term1, term2, term3, term4, term5); + + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.SHOULD) + .add(termQuery2, Occur.SHOULD) + .build(), Occur.SHOULD) + .add(new BooleanQuery.Builder().setMinimumNumberShouldMatch(1).build(), Occur.SHOULD) + .setMinimumNumberShouldMatch(2); + booleanQuery = builder.build(); + result = analyze(booleanQuery, Version.CURRENT); + // ideally it would return no extractions, but the fact + // that it doesn't consider them verified is probably good enough + assertFalse(result.verified); } public void testExtractQueryMetadata_booleanQuery_msm_pre6dot1() { @@ -353,7 +418,7 @@ public void testExactMatch_booleanQuery() { assertThat(result.minimumShouldMatch, equalTo(1)); builder = new BooleanQuery.Builder(); - builder.setMinimumNumberShouldMatch(randomIntBetween(2, 32)); + builder.setMinimumNumberShouldMatch(randomIntBetween(1, 2)); builder.add(termQuery1, BooleanClause.Occur.SHOULD); builder.add(termQuery2, BooleanClause.Occur.SHOULD); result = analyze(builder.build(), Version.CURRENT); @@ -379,6 +444,54 @@ public void testExactMatch_booleanQuery() { result = analyze(builder.build(), Version.CURRENT); assertThat("Prohibited clause, so candidate matches are not verified", result.verified, is(false)); assertThat(result.minimumShouldMatch, equalTo(1)); + + builder = new BooleanQuery.Builder(); + builder.add(termQuery1, randomBoolean() ? BooleanClause.Occur.MUST : BooleanClause.Occur.FILTER); + builder.add(termQuery2, BooleanClause.Occur.MUST_NOT); + result = analyze(builder.build(), Version.CURRENT); + assertThat("Prohibited clause, so candidate matches are not verified", result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(1)); + + TermQuery termQuery3 = new TermQuery(new Term("_field", "_term3")); + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.FILTER) + .add(termQuery2, Occur.FILTER) + .build(), Occur.SHOULD) + .add(termQuery3, Occur.SHOULD); + result = analyze(builder.build(), Version.CURRENT); + assertThat("Inner clause that is not a pure disjunction, so candidate matches are not verified", result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(1)); + + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.SHOULD) + .add(termQuery2, Occur.SHOULD) + .build(), Occur.SHOULD) + .add(termQuery3, Occur.SHOULD); + result = analyze(builder.build(), Version.CURRENT); + assertThat("Inner clause that is a pure disjunction, so candidate matches are verified", result.verified, is(true)); + assertThat(result.minimumShouldMatch, equalTo(1)); + + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.SHOULD) + .add(termQuery2, Occur.SHOULD) + .build(), Occur.MUST) + .add(termQuery3, Occur.FILTER); + result = analyze(builder.build(), Version.CURRENT); + assertThat("Disjunctions of conjunctions can't be verified", result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(2)); + + builder = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(termQuery1, Occur.MUST) + .add(termQuery2, Occur.FILTER) + .build(), Occur.SHOULD) + .add(termQuery3, Occur.SHOULD); + result = analyze(builder.build(), Version.CURRENT); + assertThat("Conjunctions of disjunctions can't be verified", result.verified, is(false)); + assertThat(result.minimumShouldMatch, equalTo(1)); } public void testBooleanQueryWithMustAndShouldClauses() { @@ -564,16 +677,15 @@ public void testExtractQueryMetadata_matchNoDocsQuery() { Result result = analyze(new MatchNoDocsQuery("sometimes there is no reason at all"), Version.CURRENT); assertThat(result.verified, is(true)); assertEquals(0, result.extractions.size()); - assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.minimumShouldMatch, equalTo(0)); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.MUST); bq.add(new MatchNoDocsQuery("sometimes there is no reason at all"), BooleanClause.Occur.MUST); result = analyze(bq.build(), Version.CURRENT); assertThat(result.verified, is(true)); - assertEquals(1, result.extractions.size()); - assertThat(result.minimumShouldMatch, equalTo(2)); - assertTermsEqual(result.extractions, new Term("field", "value")); + assertEquals(0, result.extractions.size()); + assertThat(result.minimumShouldMatch, equalTo(0)); bq = new BooleanQuery.Builder(); bq.add(new TermQuery(new Term("field", "value")), BooleanClause.Occur.SHOULD); @@ -785,7 +897,7 @@ public void testSynonymQuery() { SynonymQuery query = new SynonymQuery(); Result result = analyze(query, Version.CURRENT); assertThat(result.verified, is(true)); - assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.minimumShouldMatch, equalTo(0)); assertThat(result.extractions.isEmpty(), is(true)); query = new SynonymQuery(new Term("_field", "_value1"), new Term("_field", "_value2")); @@ -997,7 +1109,7 @@ public void testPointRangeQuery_lowerUpperReversed() { Query query = IntPoint.newRangeQuery("_field", 20, 10); Result result = analyze(query, Version.CURRENT); assertTrue(result.verified); - assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.minimumShouldMatch, equalTo(0)); assertThat(result.extractions.size(), equalTo(0)); } @@ -1179,7 +1291,7 @@ public void testExtractQueryMetadata_duplicatedClauses() { BooleanClause.Occur.SHOULD ); result = analyze(builder.build(), Version.CURRENT); - assertThat(result.verified, is(true)); + assertThat(result.verified, is(false)); assertThat(result.matchAllDocs, is(false)); assertThat(result.minimumShouldMatch, equalTo(2)); assertTermsEqual(result.extractions, new Term("field", "value1"), new Term("field", "value2"),