diff --git a/docs/reference/query-dsl/span-multi-term-query.asciidoc b/docs/reference/query-dsl/span-multi-term-query.asciidoc index ff7af83451be1..40bd15532984e 100644 --- a/docs/reference/query-dsl/span-multi-term-query.asciidoc +++ b/docs/reference/query-dsl/span-multi-term-query.asciidoc @@ -37,10 +37,9 @@ GET /_search -------------------------------------------------- // CONSOLE -WARNING: By default `span_multi queries are rewritten to a `span_or` query -containing **all** the expanded terms. This can be expensive if the number of expanded -terms is large. To avoid an unbounded expansion you can set the -<> of the multi term query to `top_terms_*` -rewrite. Or, if you use `span_multi` on `prefix` query only, you can -activate the <> field option of the `text` field instead. This will -rewrite any prefix query on the field to a a single term query that matches the indexed prefix. \ No newline at end of file +WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the +boolean query limit (defaults to 1024).To avoid an unbounded expansion you can set the <> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only, +you can activate the <> field option of the `text` field instead. This will +rewrite any prefix query on the field to a a single term query that matches the indexed prefix. + diff --git a/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java index b574cadc423b4..0576a68a85b90 100644 --- a/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java @@ -19,6 +19,9 @@ package org.elasticsearch.index.query; import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MultiTermQuery; @@ -26,11 +29,15 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.ScoringRewrite; +import org.apache.lucene.search.TopTermsRewrite; import org.apache.lucene.search.spans.SpanBoostQuery; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.elasticsearch.Version; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; @@ -42,6 +49,8 @@ import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Objects; /** @@ -49,12 +58,10 @@ * as a {@link SpanQueryBuilder} so it can be nested. */ public class SpanMultiTermQueryBuilder extends AbstractQueryBuilder - implements SpanQueryBuilder { + implements SpanQueryBuilder { public static final String NAME = "span_multi"; - private static final ParseField MATCH_FIELD = new ParseField("match"); - private final MultiTermQueryBuilder multiTermQueryBuilder; public SpanMultiTermQueryBuilder(MultiTermQueryBuilder multiTermQueryBuilder) { @@ -83,7 +90,7 @@ public MultiTermQueryBuilder innerQuery() { @Override protected void doXContent(XContentBuilder builder, Params params) - throws IOException { + throws IOException { builder.startObject(NAME); builder.field(MATCH_FIELD.getPreferredName()); multiTermQueryBuilder.toXContent(builder, params); @@ -105,7 +112,7 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro QueryBuilder query = parseInnerQueryBuilder(parser); if (query instanceof MultiTermQueryBuilder == false) { throw new ParsingException(parser.getTokenLocation(), - "[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query"); + "[span_multi] [" + MATCH_FIELD.getPreferredName() + "] must be of type multi term query"); } subQuery = (MultiTermQueryBuilder) query; } else { @@ -124,12 +131,55 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro if (subQuery == null) { throw new ParsingException(parser.getTokenLocation(), - "[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause"); + "[span_multi] must have [" + MATCH_FIELD.getPreferredName() + "] multi term query clause"); } return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost); } + public static class TopTermSpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod { + + private MultiTermQuery multiTermQuery; + private final long maxExpansions; + + TopTermSpanBooleanQueryRewriteWithMaxClause(long max) { + maxExpansions = max; + } + + @Override + public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + multiTermQuery = query; + return (SpanQuery) this.delegate.rewrite(reader, multiTermQuery); + } + + final ScoringRewrite> delegate = new ScoringRewrite>() { + + @Override + protected List getTopLevelBuilder() { + return new ArrayList(); + } + + @Override + protected Query build(List builder) { + return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()])); + } + + @Override + protected void checkMaxClauseCount(int count) { + if (count > maxExpansions) { + throw new ElasticsearchException("[" + multiTermQuery.toString() + " ] " + + "exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]"); + } + } + + @Override + protected void addClause(List topLevel, Term term, int docCount, float boost, TermContext states) { + SpanTermQuery q = new SpanTermQuery(term, states); + topLevel.add(q); + } + }; + } + @Override protected Query doToQuery(QueryShardContext context) throws IOException { Query subQuery = multiTermQueryBuilder.toQuery(context); @@ -190,10 +240,15 @@ protected Query doToQuery(QueryShardContext context) throws IOException { + MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName()); } spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery); + if (((MultiTermQuery) subQuery).getRewriteMethod() instanceof TopTermsRewrite == false) { + ((SpanMultiTermQueryWrapper) spanQuery).setRewriteMethod(new + TopTermSpanBooleanQueryRewriteWithMaxClause(BooleanQuery.getMaxClauseCount())); + } } if (boost != AbstractQueryBuilder.DEFAULT_BOOST) { return new SpanBoostQuery(spanQuery, boost); } + return spanQuery; } diff --git a/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java index b778168235977..fae66dd68bc0c 100644 --- a/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java @@ -34,7 +34,6 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.internal.SearchContext; @@ -238,4 +237,38 @@ public void testFromJson() throws IOException { assertEquals(json, "ki", ((PrefixQueryBuilder) parsed.innerQuery()).value()); assertEquals(json, 1.08, parsed.innerQuery().boost(), 0.0001); } + + public void testDefaultMaxRewriteBuilder() throws Exception { + Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b")). + toQuery(createShardContext()); + + if (query instanceof SpanBoostQuery) { + query = ((SpanBoostQuery)query).getQuery(); + } + + assertTrue(query instanceof SpanMultiTermQueryWrapper); + if (query instanceof SpanMultiTermQueryWrapper) { + MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod(); + assertTrue(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause); + } + + } + + public void testTopNMultiTermsRewriteInsideSpan() throws Exception { + + Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b").rewrite + ("top_terms_boost_2000")). + toQuery(createShardContext()); + + if (query instanceof SpanBoostQuery) { + query = ((SpanBoostQuery)query).getQuery(); + } + + assertTrue(query instanceof SpanMultiTermQueryWrapper); + if (query instanceof SpanMultiTermQueryWrapper) { + MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod(); + assertFalse(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause); + } + + } } diff --git a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java index 7e1231f90592e..5dd669119e56d 100644 --- a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -19,7 +19,9 @@ package org.elasticsearch.search.query; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.util.English; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchPhaseExecutionException; @@ -33,8 +35,12 @@ import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.Operator; +import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.index.query.SpanMultiTermQueryBuilder; +import org.elasticsearch.index.query.SpanNearQueryBuilder; +import org.elasticsearch.index.query.SpanTermQueryBuilder; import org.elasticsearch.index.query.TermQueryBuilder; import org.elasticsearch.index.query.WrapperQueryBuilder; import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; @@ -52,6 +58,7 @@ import org.joda.time.format.ISODateTimeFormat; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Random; @@ -1819,4 +1826,30 @@ public void testRangeQueryRangeFields_24744() throws Exception { assertHitCount(searchResponse, 1); } + public void testTermExpansionExceptionOnSpanFailure() throws ExecutionException, InterruptedException { + Settings.Builder builder = Settings.builder(); + builder.put(SETTING_NUMBER_OF_SHARDS, 1).build(); + + createIndex("test", builder.build()); + ArrayList reqs = new ArrayList<>(); + int origBoolMaxClauseCount = BooleanQuery.getMaxClauseCount(); + try { + BooleanQuery.setMaxClauseCount(2); + for (int i = 0; i < BooleanQuery.getMaxClauseCount() + 1; i++) { + reqs.add(client().prepareIndex("test", "_doc", Integer.toString(i)).setSource("body", "foo" + + Integer.toString(i) + " bar baz")); + } + indexRandom(true, false, reqs); + + QueryBuilder queryBuilder = new SpanNearQueryBuilder(new SpanMultiTermQueryBuilder(QueryBuilders.wildcardQuery + ("body", "f*")), 0).addClause(new SpanTermQueryBuilder("body", "bar")); + + expectThrows(ElasticsearchException.class, () -> + client().prepareSearch().setIndices("test").setQuery(queryBuilder).get()); + } finally { + BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount); + } + + } + }