From f03828d5a72600b30d647e2f503ebb060a8d2c4e Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Mon, 14 Jan 2019 21:23:43 +0100 Subject: [PATCH 1/5] Allow field types to optimize phrase prefix queries This change adds a way to customize how phrase prefix queries should be created on field types. The match phrase prefix query is exposed in field types in order to allow optimizations based on the options set on the field. For instance the text field uses the configured prefix field (if available) to build a span near that mixes the original field and the prefix field on the last position. This change also contains a small refactoring of the match/multi_match query that simplifies the interactions between the builders. Closes #31921 --- .../AnnotatedTextFieldMapper.java | 64 +-- .../lucene/search/MultiPhrasePrefixQuery.java | 11 +- .../index/mapper/MappedFieldType.java | 10 +- .../index/mapper/TextFieldMapper.java | 174 ++++++-- .../index/search/MatchQuery.java | 403 +++++++++--------- .../index/search/MultiMatchQuery.java | 334 ++++++--------- .../CustomUnifiedHighlighterTests.java | 4 +- .../search/MultiPhrasePrefixQueryTests.java | 10 +- .../index/mapper/TextFieldMapperTests.java | 104 +++++ .../MatchPhrasePrefixQueryBuilderTests.java | 16 +- .../index/query/MatchQueryBuilderTests.java | 8 +- .../query/MultiMatchQueryBuilderTests.java | 12 +- .../query/QueryStringQueryBuilderTests.java | 7 +- .../index/search/MultiMatchQueryTests.java | 22 +- 14 files changed, 615 insertions(+), 564 deletions(-) diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index 79fefbc64d407..c49044e4b6b87 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -27,15 +27,12 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; -import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.NormsFieldExistsQuery; -import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.elasticsearch.ElasticsearchParseException; @@ -603,62 +600,19 @@ public Query existsQuery(QueryShardContext context) { } @Override - public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { - PhraseQuery.Builder builder = new PhraseQuery.Builder(); - builder.setSlop(slop); - - TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); - int position = -1; - - stream.reset(); - while (stream.incrementToken()) { - if (enablePosIncrements) { - position += posIncrAtt.getPositionIncrement(); - } - else { - position += 1; - } - builder.add(new Term(field, termAtt.getBytesRef()), position); - } - - return builder.build(); + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); } @Override - public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { - - MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder(); - mpqb.setSlop(slop); - - TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - - PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); - int position = -1; - - List multiTerms = new ArrayList<>(); - stream.reset(); - while (stream.incrementToken()) { - int positionIncrement = posIncrAtt.getPositionIncrement(); - - if (positionIncrement > 0 && multiTerms.size() > 0) { - if (enablePositionIncrements) { - mpqb.add(multiTerms.toArray(new Term[0]), position); - } else { - mpqb.add(multiTerms.toArray(new Term[0])); - } - multiTerms.clear(); - } - position += positionIncrement; - multiTerms.add(new Term(field, termAtt.getBytesRef())); - } + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); + } - if (enablePositionIncrements) { - mpqb.add(multiTerms.toArray(new Term[0]), position); - } else { - mpqb.add(multiTerms.toArray(new Term[0])); - } - return mpqb.build(); + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, + boolean enablePositionIncrements) throws IOException { + return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions, enablePositionIncrements); } } diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java b/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java index b8e1039b2df1d..06b7774764050 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java @@ -42,13 +42,17 @@ public class MultiPhrasePrefixQuery extends Query { - private String field; + private final String field; private ArrayList termArrays = new ArrayList<>(); private ArrayList positions = new ArrayList<>(); private int maxExpansions = Integer.MAX_VALUE; private int slop = 0; + public MultiPhrasePrefixQuery(String field) { + this.field = field; + } + /** * Sets the phrase slop for this query. * @@ -102,9 +106,6 @@ public void add(Term[] terms) { * @see org.apache.lucene.search.PhraseQuery.Builder#add(Term, int) */ public void add(Term[] terms, int position) { - if (termArrays.size() == 0) - field = terms[0].field(); - for (int i = 0; i < terms.length; i++) { if (terms[i].field() != field) { throw new IllegalArgumentException( @@ -212,7 +213,7 @@ private void getPrefixTerms(ObjectHashSet terms, final Term prefix, final @Override public final String toString(String f) { StringBuilder buffer = new StringBuilder(); - if (field == null || !field.equals(f)) { + if (field.equals(f) == false) { buffer.append(field); buffer.append(":"); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 741b2300a4678..f806fc40125ad 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -365,16 +365,22 @@ public Query regexpQuery(String value, int flags, int maxDeterminizedStates, @Nu public abstract Query existsQuery(QueryShardContext context); - public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } - public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } + public Query phrasePrefixQuery(TokenStream stream, int slop, + int maxExpansions, boolean enablePositionIncrements) throws IOException { + throw new IllegalArgumentException("Can only use phrase prefix queries on text fields - not on [" + name + + "] which is of type [" + typeName() + "]"); + } + /** * Create an {@link IntervalsSource} to be used for proximity queries */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 1b25c7b9866f7..1c638203a23c8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -21,7 +21,6 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.AnalyzerWrapper; -import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; @@ -41,13 +40,20 @@ import org.apache.lucene.search.NormsFieldExistsQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.intervals.IntervalsSource; +import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.Operations; import org.elasticsearch.Version; import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.support.XContentMapValues; @@ -60,6 +66,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -617,9 +624,9 @@ public IntervalsSource intervals(String text, int maxGaps, boolean ordered, Name } @Override - public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { - - if (indexPhrases && slop == 0 && hasGaps(cache(stream)) == false) { + public Query phraseQuery(TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { + String field = name(); + if (indexPhrases && slop == 0 && hasGaps(stream) == false) { stream = new FixedShingleFilter(stream, 2); field = field + FAST_PHRASE_SUFFIX; } @@ -645,54 +652,85 @@ public Query phraseQuery(String field, TokenStream stream, int slop, boolean ena } @Override - public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { - - if (indexPhrases && slop == 0 && hasGaps(cache(stream)) == false) { + public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + String field = name(); + if (indexPhrases && slop == 0 && hasGaps(stream) == false) { stream = new FixedShingleFilter(stream, 2); field = field + FAST_PHRASE_SUFFIX; } + return createPhraseQuery(stream, field, slop, enablePositionIncrements); + } - MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder(); - mpqb.setSlop(slop); - - TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + @Override + public Query phrasePrefixQuery(TokenStream stream, int slop, + int maxExpansions, boolean enablePositionIncrements) throws IOException { + return analyzePhrasePrefix(stream, slop, maxExpansions, enablePositionIncrements); + } - PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); - int position = -1; + private Query analyzePhrasePrefix(TokenStream stream, int slop, + int maxExpansions, boolean enablePositionIncrements) throws IOException { + MultiPhrasePrefixQuery query = createPhrasePrefixQuery(stream, name(), slop, maxExpansions, enablePositionIncrements); - List multiTerms = new ArrayList<>(); - stream.reset(); - while (stream.incrementToken()) { - int positionIncrement = posIncrAtt.getPositionIncrement(); + if (prefixFieldType == null + || prefixFieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { + return query; + } - if (positionIncrement > 0 && multiTerms.size() > 0) { - if (enablePositionIncrements) { - mpqb.add(multiTerms.toArray(new Term[0]), position); - } else { - mpqb.add(multiTerms.toArray(new Term[0])); - } - multiTerms.clear(); + int lastPos = query.getTerms().length - 1; + final Term[][] terms = query.getTerms(); + final int[] positions = query.getPositions(); + for (Term term : terms[lastPos]) { + String value = term.text(); + if (value.length() < prefixFieldType.minChars || value.length() > prefixFieldType.maxChars) { + return query; } - position += positionIncrement; - multiTerms.add(new Term(field, termAtt.getBytesRef())); } - if (enablePositionIncrements) { - mpqb.add(multiTerms.toArray(new Term[0]), position); - } else { - mpqb.add(multiTerms.toArray(new Term[0])); + if (terms.length == 1) { + Term[] newTerms = Arrays.stream(terms[0]) + .map(term -> new Term(prefixFieldType.name(), term.bytes())) + .toArray(Term[]::new); + return new SynonymQuery(newTerms); } - return mpqb.build(); - } - private static CachingTokenFilter cache(TokenStream in) { - if (in instanceof CachingTokenFilter) { - return (CachingTokenFilter) in; + SpanNearQuery.Builder spanQuery = new SpanNearQuery.Builder(name(), true); + spanQuery.setSlop(slop); + int previousPos = -1; + for (int i = 0; i < terms.length; i++) { + Term[] posTerms = terms[i]; + int posInc = positions[i] - previousPos; + previousPos = positions[i]; + if (posInc > 1) { + spanQuery.addGap(posInc - 1); + } + if (i == lastPos) { + if (posTerms.length == 1) { + FieldMaskingSpanQuery fieldMask = + new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixFieldType.name(), posTerms[0].bytes())), name()); + spanQuery.addClause(fieldMask); + } else { + SpanQuery[] queries = Arrays.stream(posTerms) + .map(term -> new FieldMaskingSpanQuery( + new SpanTermQuery(new Term(prefixFieldType.name(), term.bytes())), name()) + ) + .toArray(SpanQuery[]::new); + spanQuery.addClause(new SpanOrQuery(queries)); + } + } else { + if (posTerms.length == 1) { + spanQuery.addClause(new SpanTermQuery(posTerms[0])); + } else { + SpanTermQuery[] queries = Arrays.stream(posTerms) + .map(SpanTermQuery::new) + .toArray(SpanTermQuery[]::new); + spanQuery.addClause(new SpanOrQuery(queries)); + } + } } - return new CachingTokenFilter(in); + return spanQuery.build(); } - private static boolean hasGaps(CachingTokenFilter stream) throws IOException { + private static boolean hasGaps(TokenStream stream) throws IOException { PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class); stream.reset(); while (stream.incrementToken()) { @@ -870,4 +908,66 @@ protected void doXContentBody(XContentBuilder builder, boolean includeDefaults, builder.field("index_phrases", fieldType().indexPhrases); } } + + public static Query createPhraseQuery(TokenStream stream, String field, int slop, boolean enablePositionIncrements) throws IOException { + MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder(); + mpqb.setSlop(slop); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + int position = -1; + + List multiTerms = new ArrayList<>(); + stream.reset(); + while (stream.incrementToken()) { + int positionIncrement = posIncrAtt.getPositionIncrement(); + + if (positionIncrement > 0 && multiTerms.size() > 0) { + if (enablePositionIncrements) { + mpqb.add(multiTerms.toArray(new Term[0]), position); + } else { + mpqb.add(multiTerms.toArray(new Term[0])); + } + multiTerms.clear(); + } + position += positionIncrement; + multiTerms.add(new Term(field, termAtt.getBytesRef())); + } + + if (enablePositionIncrements) { + mpqb.add(multiTerms.toArray(new Term[0]), position); + } else { + mpqb.add(multiTerms.toArray(new Term[0])); + } + return mpqb.build(); + } + + public static MultiPhrasePrefixQuery createPhrasePrefixQuery(TokenStream stream, String field, int slop, int maxExpansions, + boolean enablePositionIncrements) throws IOException { + MultiPhrasePrefixQuery builder = new MultiPhrasePrefixQuery(field); + builder.setSlop(slop); + builder.setMaxExpansions(maxExpansions); + + List currentTerms = new ArrayList<>(); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + + stream.reset(); + int position = -1; + while (stream.incrementToken()) { + int posInc = enablePositionIncrements ? posIncrAtt.getPositionIncrement() : 1; + if (posInc != 0) { + if (currentTerms.isEmpty() == false) { + builder.add(currentTerms.toArray(new Term[0]), position); + } + position += posInc; + currentTerms.clear(); + } + currentTerms.add(new Term(field, termAtt.getBytesRef())); + } + builder.add(currentTerms.toArray(new Term[0]), position); + return builder; + } } diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 267f3a6951161..c9683fd94af75 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -20,36 +20,30 @@ package org.elasticsearch.index.search; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; -import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; -import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.QueryBuilder; +import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings; import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.lucene.Lucene; -import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.Queries; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.MappedFieldType; @@ -57,6 +51,8 @@ import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; +import java.util.Iterator; +import java.util.function.Supplier; import static org.elasticsearch.common.lucene.search.Queries.newLenientFieldQuery; import static org.elasticsearch.common.lucene.search.Queries.newUnmappedFieldQuery; @@ -234,78 +230,83 @@ public void setAutoGenerateSynonymsPhraseQuery(boolean enabled) { this.autoGenerateSynonymsPhraseQuery = enabled; } - protected Analyzer getAnalyzer(MappedFieldType fieldType, boolean quoted) { - if (analyzer == null) { - return quoted ? context.getSearchQuoteAnalyzer(fieldType) : context.getSearchAnalyzer(fieldType); - } else { - return analyzer; - } - } - - private boolean hasPositions(MappedFieldType fieldType) { - return fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; - } - public Query parse(Type type, String fieldName, Object value) throws IOException { - MappedFieldType fieldType = context.fieldMapper(fieldName); + final MappedFieldType fieldType = context.fieldMapper(fieldName); if (fieldType == null) { return newUnmappedFieldQuery(fieldName); } - final String field = fieldType.name(); - - Analyzer analyzer = getAnalyzer(fieldType, type == Type.PHRASE); + Analyzer analyzer = getAnalyzer(fieldType, type == Type.PHRASE || type == Type.PHRASE_PREFIX); assert analyzer != null; + MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType); + /* * If a keyword analyzer is used, we know that further analysis isn't * needed and can immediately return a term query. */ - if (analyzer == Lucene.KEYWORD_ANALYZER) { - return blendTermQuery(new Term(fieldName, value.toString()), fieldType); + if (analyzer == Lucene.KEYWORD_ANALYZER + && type != Type.PHRASE_PREFIX) { + return builder.newTermQuery(new Term(fieldName, value.toString())); } - MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType); - builder.setEnablePositionIncrements(this.enablePositionIncrements); - if (hasPositions(fieldType)) { - builder.setAutoGenerateMultiTermSynonymsPhraseQuery(this.autoGenerateSynonymsPhraseQuery); - } else { - builder.setAutoGenerateMultiTermSynonymsPhraseQuery(false); - } + return parseInternal(type, fieldName, builder, value); + } - Query query = null; + protected final Query parseInternal(Type type, String fieldName, MatchQueryBuilder builder, Object value) throws IOException { + final Query query; switch (type) { case BOOLEAN: if (commonTermsCutoff == null) { - query = builder.createBooleanQuery(field, value.toString(), occur); + query = builder.createBooleanQuery(fieldName, value.toString(), occur); } else { - query = builder.createCommonTermsQuery(field, value.toString(), occur, occur, commonTermsCutoff); + query = createCommonTermsQuery(builder, fieldName, value.toString(), occur, occur, commonTermsCutoff); } break; + case PHRASE: - query = builder.createPhraseQuery(field, value.toString(), phraseSlop); + query = builder.createPhraseQuery(fieldName, value.toString(), phraseSlop); break; + case PHRASE_PREFIX: - query = builder.createPhrasePrefixQuery(field, value.toString(), phraseSlop, maxExpansions); + query = builder.createPhrasePrefixQuery(fieldName, value.toString(), phraseSlop); break; + default: throw new IllegalStateException("No type found for [" + type + "]"); } - if (query == null) { - return zeroTermsQuery(); - } else { - return query; + return query == null ? zeroTermsQuery() : query; + } + + private Query createCommonTermsQuery(MatchQueryBuilder builder, String field, String queryText, + Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) { + Query booleanQuery = builder.createBooleanQuery(field, queryText, lowFreqOccur); + if (booleanQuery != null && booleanQuery instanceof BooleanQuery) { + BooleanQuery bq = (BooleanQuery) booleanQuery; + return boolToExtendedCommonTermsQuery(bq, highFreqOccur, lowFreqOccur, maxTermFrequency); } + return booleanQuery; } - protected final Query termQuery(MappedFieldType fieldType, BytesRef value, boolean lenient) { - try { - return fieldType.termQuery(value, context); - } catch (RuntimeException e) { - if (lenient) { - return newLenientFieldQuery(fieldType.name(), e); + private Query boolToExtendedCommonTermsQuery(BooleanQuery bq, + Occur highFreqOccur, + Occur lowFreqOccur, + float maxTermFrequency) { + ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency); + for (BooleanClause clause : bq.clauses()) { + if ((clause.getQuery() instanceof TermQuery) == false) { + return bq; } - throw e; + query.add(((TermQuery) clause.getQuery()).getTerm()); + } + return query; + } + + protected Analyzer getAnalyzer(MappedFieldType fieldType, boolean quoted) { + if (analyzer == null) { + return quoted ? context.getSearchQuoteAnalyzer(fieldType) : context.getSearchAnalyzer(fieldType); + } else { + return analyzer; } } @@ -322,63 +323,25 @@ protected Query zeroTermsQuery() { } } - private class MatchQueryBuilder extends QueryBuilder { + private boolean hasPositions(MappedFieldType fieldType) { + return fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + } - private final MappedFieldType mapper; + class MatchQueryBuilder extends QueryBuilder { + private final MappedFieldType fieldType; /** * Creates a new QueryBuilder using the given analyzer. */ - MatchQueryBuilder(Analyzer analyzer, MappedFieldType mapper) { + MatchQueryBuilder(Analyzer analyzer, MappedFieldType fieldType) { super(analyzer); - this.mapper = mapper; - } - - @Override - protected Query newTermQuery(Term term) { - return blendTermQuery(term, mapper); - } - - @Override - protected Query newSynonymQuery(Term[] terms) { - return blendTermsQuery(terms, mapper); - } - - @Override - protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { - try { - checkForPositions(field); - Query query = mapper.phraseQuery(field, stream, slop, enablePositionIncrements); - if (query instanceof PhraseQuery) { - // synonyms that expand to multiple terms can return a phrase query. - return blendPhraseQuery((PhraseQuery) query, mapper); - } - return query; - } catch (IllegalArgumentException | IllegalStateException e) { - if (lenient) { - return newLenientFieldQuery(field, e); - } - throw e; - } - } - - @Override - protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { - try { - checkForPositions(field); - return mapper.multiPhraseQuery(field, stream, slop, enablePositionIncrements); - } catch (IllegalArgumentException | IllegalStateException e) { - if (lenient) { - return newLenientFieldQuery(field, e); - } - throw e; - } - } - - private void checkForPositions(String field) { - if (hasPositions(mapper) == false) { - throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); + this.fieldType = fieldType; + if (hasPositions(fieldType)) { + setAutoGenerateMultiTermSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery); + } else { + setAutoGenerateMultiTermSynonymsPhraseQuery(false); } + setEnablePositionIncrements(enablePositionIncrements); } /** @@ -387,151 +350,171 @@ private void checkForPositions(String field) { */ @Override protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, - String queryText, boolean quoted, int phraseSlop) { + String queryText, boolean quoted, int slop) { assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; + return createQuery(field, queryText, source -> createFieldQuery(source, operator, field, quoted, slop)); + } + + public Query createPhrasePrefixQuery(String field, String queryText, int slop) { + return createQuery(field, queryText, source -> createPhrasePrefixQuery(source, field, slop)); + } + private Query createQuery(String field, String queryText, CheckedFunction queryFunc) { // Use the analyzer to get all the tokens, and then build an appropriate // query based on the analysis chain. try (TokenStream source = analyzer.tokenStream(field, queryText)) { if (source.hasAttribute(DisableGraphAttribute.class)) { /* - A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid - paths explosion. See {@link org.elasticsearch.index.analysis.ShingleTokenFilterFactory} for details. + * A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid + * paths explosion. See {@link org.elasticsearch.index.analysis.ShingleTokenFilterFactory} for details. */ setEnableGraphQueries(false); } - Query query = super.createFieldQuery(source, operator, field, quoted, phraseSlop); - setEnableGraphQueries(true); - return query; + try { + return queryFunc.apply(source); + } finally { + setEnableGraphQueries(true); + } } catch (IOException e) { throw new RuntimeException("Error analyzing query text", e); } } - public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) { - final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop); - return toMultiPhrasePrefix(query, phraseSlop, maxExpansions); - } + private Query createPhrasePrefixQuery(TokenStream source, String field, int slop) { + // Build an appropriate phrase prefix query based on the analysis chain. + try (CachingTokenFilter stream = new CachingTokenFilter(source)) { - private Query toMultiPhrasePrefix(final Query query, int phraseSlop, int maxExpansions) { - float boost = 1; - Query innerQuery = query; - while (innerQuery instanceof BoostQuery) { - BoostQuery bq = (BoostQuery) innerQuery; - boost *= bq.getBoost(); - innerQuery = bq.getQuery(); - } - if (query instanceof SpanQuery) { - return toSpanQueryPrefix((SpanQuery) query, boost); - } - final MultiPhrasePrefixQuery prefixQuery = new MultiPhrasePrefixQuery(); - prefixQuery.setMaxExpansions(maxExpansions); - prefixQuery.setSlop(phraseSlop); - if (innerQuery instanceof PhraseQuery) { - PhraseQuery pq = (PhraseQuery) innerQuery; - Term[] terms = pq.getTerms(); - int[] positions = pq.getPositions(); - for (int i = 0; i < terms.length; i++) { - prefixQuery.add(new Term[]{terms[i]}, positions[i]); + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); + PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class); + + if (termAtt == null) { + return null; } - return boost == 1 ? prefixQuery : new BoostQuery(prefixQuery, boost); - } else if (innerQuery instanceof MultiPhraseQuery) { - MultiPhraseQuery pq = (MultiPhraseQuery) innerQuery; - Term[][] terms = pq.getTermArrays(); - int[] positions = pq.getPositions(); - for (int i = 0; i < terms.length; i++) { - prefixQuery.add(terms[i], positions[i]); + + int numTokens = 0; + int positionCount = 0; + boolean isGraph = false; + + stream.reset(); + while (stream.incrementToken()) { + numTokens++; + int positionIncrement = posIncAtt.getPositionIncrement(); + if (positionIncrement != 0) { + positionCount += positionIncrement; + } + + int positionLength = posLenAtt.getPositionLength(); + if (enableGraphQueries && positionLength > 1) { + isGraph = true; + } } - return boost == 1 ? prefixQuery : new BoostQuery(prefixQuery, boost); - } else if (innerQuery instanceof TermQuery) { - prefixQuery.add(((TermQuery) innerQuery).getTerm()); - return boost == 1 ? prefixQuery : new BoostQuery(prefixQuery, boost); + + // phase 2: based on token count, presence of synonyms, and options + // formulate a single term, boolean, or phrase. + + if (numTokens == 0) { + return null; + } else if (isGraph) { + // graph + return analyzeGraphPhrasePrefix(stream, field, slop); + } else { + // single position + return analyzePhrasePrefix(field, stream, slop, positionCount); + } + } catch (IOException e) { + throw new RuntimeException("Error analyzing query text", e); } - return query; } - private Query toSpanQueryPrefix(SpanQuery query, float boost) { - if (query instanceof SpanTermQuery) { - SpanMultiTermQueryWrapper ret = - new SpanMultiTermQueryWrapper<>(new PrefixQuery(((SpanTermQuery) query).getTerm())); - return boost == 1 ? ret : new BoostQuery(ret, boost); - } else if (query instanceof SpanNearQuery) { - SpanNearQuery spanNearQuery = (SpanNearQuery) query; - SpanQuery[] clauses = spanNearQuery.getClauses(); - if (clauses[clauses.length - 1] instanceof SpanTermQuery) { - clauses[clauses.length - 1] = new SpanMultiTermQueryWrapper<>( - new PrefixQuery(((SpanTermQuery) clauses[clauses.length - 1]).getTerm()) - ); - } - SpanNearQuery newQuery = new SpanNearQuery(clauses, spanNearQuery.getSlop(), spanNearQuery.isInOrder()); - return boost == 1 ? newQuery : new BoostQuery(newQuery, boost); - } else if (query instanceof SpanOrQuery) { - SpanOrQuery orQuery = (SpanOrQuery) query; - SpanQuery[] clauses = new SpanQuery[orQuery.getClauses().length]; - for (int i = 0; i < clauses.length; i++) { - clauses[i] = (SpanQuery) toSpanQueryPrefix(orQuery.getClauses()[i], 1); - } - return boost == 1 ? new SpanOrQuery(clauses) : new BoostQuery(new SpanOrQuery(clauses), boost); + @Override + protected Query newTermQuery(Term term) { + Supplier querySupplier; + if (fuzziness != null) { + querySupplier = () -> { + Query query = fieldType.fuzzyQuery(term.text(), fuzziness, fuzzyPrefixLength, maxExpansions, transpositions); + if (query instanceof FuzzyQuery) { + QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod); + } + return query; + }; } else { + querySupplier = () -> fieldType.termQuery(term.bytes(), context); + } + try { + Query query = querySupplier.get(); return query; + } catch (RuntimeException e) { + if (lenient) { + return newLenientFieldQuery(fieldType.name(), e); + } else { + throw e; + } } } - public Query createCommonTermsQuery(String field, String queryText, - Occur highFreqOccur, - Occur lowFreqOccur, - float maxTermFrequency) { - Query booleanQuery = createBooleanQuery(field, queryText, lowFreqOccur); - if (booleanQuery != null && booleanQuery instanceof BooleanQuery) { - BooleanQuery bq = (BooleanQuery) booleanQuery; - return boolToExtendedCommonTermsQuery(bq, highFreqOccur, lowFreqOccur, maxTermFrequency); + @Override + protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { + try { + checkForPositions(field); + return fieldType.phraseQuery(stream, slop, enablePositionIncrements); + } catch (IllegalArgumentException | IllegalStateException e) { + if (lenient) { + return newLenientFieldQuery(field, e); + } + throw e; } - return booleanQuery; } - private Query boolToExtendedCommonTermsQuery(BooleanQuery bq, - Occur highFreqOccur, - Occur lowFreqOccur, - float maxTermFrequency) { - ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency); - for (BooleanClause clause : bq.clauses()) { - if (!(clause.getQuery() instanceof TermQuery)) { - return bq; + @Override + protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { + try { + checkForPositions(field); + return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + } catch (IllegalArgumentException | IllegalStateException e) { + if (lenient) { + return newLenientFieldQuery(field, e); } - query.add(((TermQuery) clause.getQuery()).getTerm()); + throw e; } - return query; } - } - - /** - * Called when a phrase query is built with {@link QueryBuilder#analyzePhrase(String, TokenStream, int)}. - * Subclass can override this function to blend this query to multiple fields. - */ - protected Query blendPhraseQuery(PhraseQuery query, MappedFieldType fieldType) { - return query; - } - - protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) { - return new SynonymQuery(terms); - } - protected Query blendTermQuery(Term term, MappedFieldType fieldType) { - if (fuzziness != null) { + private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, int positionCount) throws IOException { try { - Query query = fieldType.fuzzyQuery(term.text(), fuzziness, fuzzyPrefixLength, maxExpansions, transpositions); - if (query instanceof FuzzyQuery) { - QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod); + if (positionCount > 1) { + checkForPositions(field); } - return query; - } catch (RuntimeException e) { + return fieldType.phrasePrefixQuery(stream, slop, maxExpansions, enablePositionIncrements); + } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { - return newLenientFieldQuery(fieldType.name(), e); - } else { - throw e; + return newLenientFieldQuery(field, e); + } + throw e; + } + } + + private Query analyzeGraphPhrasePrefix(TokenStream source, String field, int slop) throws IOException { + source.reset(); + GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source); + /* + * Creates a boolean query from the graph token stream by extracting all the finite strings from the graph + * and using them to create phrase prefix queries with the appropriate slop. + */ + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + Iterator it = graph.getFiniteStrings(); + while (it.hasNext()) { + Query query = createPhrasePrefixQuery(it.next(), field, slop); + if (query != null) { + builder.add(query, BooleanClause.Occur.SHOULD); } } + return builder.build(); + } + + private void checkForPositions(String field) { + if (hasPositions(fieldType) == false) { + throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); + } } - return termQuery(fieldType, term.bytes(), lenient); } } diff --git a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java index 6f57faba001c9..ed883609426b4 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java @@ -20,12 +20,12 @@ package org.elasticsearch.index.search; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.Term; import org.apache.lucene.queries.BlendedTermQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; @@ -49,222 +49,174 @@ public class MultiMatchQuery extends MatchQuery { private Float groupTieBreaker = null; - public void setTieBreaker(float tieBreaker) { - this.groupTieBreaker = tieBreaker; - } - public MultiMatchQuery(QueryShardContext context) { super(context); } - private Query parseAndApply(Type type, String fieldName, Object value, - String minimumShouldMatch, Float boostValue) throws IOException { - Query query = parse(type, fieldName, value); - query = Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); - if (query != null && boostValue != null && - boostValue != AbstractQueryBuilder.DEFAULT_BOOST && query instanceof MatchNoDocsQuery == false) { - query = new BoostQuery(query, boostValue); - } - return query; + public void setTieBreaker(float tieBreaker) { + this.groupTieBreaker = tieBreaker; } public Query parse(MultiMatchQueryBuilder.Type type, Map fieldNames, - Object value, String minimumShouldMatch) throws IOException { - final Query result; - // reset query builder - queryBuilder = null; - if (fieldNames.size() == 1) { - Map.Entry fieldBoost = fieldNames.entrySet().iterator().next(); - Float boostValue = fieldBoost.getValue(); - result = parseAndApply(type.matchQueryType(), fieldBoost.getKey(), value, minimumShouldMatch, boostValue); - } else { - final float tieBreaker = groupTieBreaker == null ? type.tieBreaker() : groupTieBreaker; - switch (type) { - case PHRASE: - case PHRASE_PREFIX: - case BEST_FIELDS: - case MOST_FIELDS: - queryBuilder = new QueryBuilder(tieBreaker); - break; - case CROSS_FIELDS: - queryBuilder = new CrossFieldsQueryBuilder(tieBreaker); - break; - default: - throw new IllegalStateException("No such type: " + type); - } - final List queries = queryBuilder.buildGroupedQueries(type, fieldNames, value, minimumShouldMatch); - result = queryBuilder.combineGrouped(queries); + Object value, String minimumShouldMatch) throws IOException { + final float tieBreaker = groupTieBreaker == null ? type.tieBreaker() : groupTieBreaker; + final List queries; + switch (type) { + case PHRASE: + case PHRASE_PREFIX: + case BEST_FIELDS: + case MOST_FIELDS: + queries = buildFieldQueries(type, fieldNames, value, minimumShouldMatch); + break; + + case CROSS_FIELDS: + queries = buildCrossFieldQuery(type, fieldNames, value, minimumShouldMatch, tieBreaker); + break; + + default: + throw new IllegalStateException("No such type: " + type); } - return result; + return combineGrouped(queries, tieBreaker); } - private QueryBuilder queryBuilder; - - public class QueryBuilder { - protected final float tieBreaker; - - public QueryBuilder(float tieBreaker) { - this.tieBreaker = tieBreaker; + private Query combineGrouped(List groupQuery, float tieBreaker) { + if (groupQuery.isEmpty()) { + return zeroTermsQuery(); } - - public List buildGroupedQueries(MultiMatchQueryBuilder.Type type, Map fieldNames, - Object value, String minimumShouldMatch) throws IOException{ - List queries = new ArrayList<>(); - for (String fieldName : fieldNames.keySet()) { - Float boostValue = fieldNames.get(fieldName); - Query query = parseGroup(type.matchQueryType(), fieldName, boostValue, value, minimumShouldMatch); - if (query != null) { - queries.add(query); - } - } - return queries; - } - - Query parseGroup(Type type, String field, Float boostValue, Object value, String minimumShouldMatch) throws IOException { - if (context.fieldMapper(field) == null) { - return null; // indicates to the caller that this field is unmapped and should be disregarded - } - return parseAndApply(type, field, value, minimumShouldMatch, boostValue); + if (groupQuery.size() == 1) { + return groupQuery.get(0); } + return new DisjunctionMaxQuery(groupQuery, tieBreaker); + } - private Query combineGrouped(List groupQuery) { - if (groupQuery == null || groupQuery.isEmpty()) { - return zeroTermsQuery(); + private List buildFieldQueries(MultiMatchQueryBuilder.Type type, Map fieldNames, + Object value, String minimumShouldMatch) throws IOException{ + List queries = new ArrayList<>(); + for (String fieldName : fieldNames.keySet()) { + if (context.fieldMapper(fieldName) == null) { + // ignore unmapped fields + continue; } - if (groupQuery.size() == 1) { - return groupQuery.get(0); + Float boostValue = fieldNames.get(fieldName); + Query query = parse(type.matchQueryType(), fieldName, value); + query = Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); + if (query != null + && boostValue != null + && boostValue != AbstractQueryBuilder.DEFAULT_BOOST + && query instanceof MatchNoDocsQuery == false) { + query = new BoostQuery(query, boostValue); } - List queries = new ArrayList<>(); - for (Query query : groupQuery) { + if (query != null) { queries.add(query); } - return new DisjunctionMaxQuery(queries, tieBreaker); - } - - public Query blendTerm(Term term, MappedFieldType fieldType) { - return MultiMatchQuery.super.blendTermQuery(term, fieldType); } + return queries; + } - public Query blendTerms(Term[] terms, MappedFieldType fieldType) { - return MultiMatchQuery.super.blendTermsQuery(terms, fieldType); + private List buildCrossFieldQuery(MultiMatchQueryBuilder.Type type, Map fieldNames, + Object value, String minimumShouldMatch, float tieBreaker) throws IOException { + Map> groups = new HashMap<>(); + List queries = new ArrayList<>(); + for (Map.Entry entry : fieldNames.entrySet()) { + String name = entry.getKey(); + MappedFieldType fieldType = context.fieldMapper(name); + if (fieldType != null) { + Analyzer actualAnalyzer = getAnalyzer(fieldType, type == MultiMatchQueryBuilder.Type.PHRASE); + if (!groups.containsKey(actualAnalyzer)) { + groups.put(actualAnalyzer, new ArrayList<>()); + } + float boost = entry.getValue() == null ? 1.0f : entry.getValue(); + groups.get(actualAnalyzer).add(new FieldAndBoost(fieldType, boost)); + } } - - public Query termQuery(MappedFieldType fieldType, BytesRef value) { - return MultiMatchQuery.this.termQuery(fieldType, value, lenient); + for (Map.Entry> group : groups.entrySet()) { + final MatchQueryBuilder builder; + if (group.getValue().size() == 1) { + builder = new MatchQueryBuilder(group.getKey(), group.getValue().get(0).fieldType); + } else { + builder = new BlendedQueryBuilder(group.getKey(), group.getValue(), tieBreaker); + } + /* + * We have to pick some field to pass through the superclass so + * we just pick the first field. It shouldn't matter because + * fields are already grouped by their analyzers/types. + */ + String representativeField = group.getValue().get(0).fieldType.name(); + Query query = parseInternal(type.matchQueryType(), representativeField, builder, value); + query = Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); + if (query != null) { + queries.add(query); + } } - public Query blendPhrase(PhraseQuery query, MappedFieldType type) { - return MultiMatchQuery.super.blendPhraseQuery(query, type); - } + return queries; } - final class CrossFieldsQueryBuilder extends QueryBuilder { - private FieldAndFieldType[] blendedFields; - - CrossFieldsQueryBuilder(float tiebreaker) { - super(tiebreaker); - } - - @Override - public List buildGroupedQueries(MultiMatchQueryBuilder.Type type, Map fieldNames, - Object value, String minimumShouldMatch) throws IOException { - Map> groups = new HashMap<>(); - List queries = new ArrayList<>(); - for (Map.Entry entry : fieldNames.entrySet()) { - String name = entry.getKey(); - MappedFieldType fieldType = context.fieldMapper(name); - if (fieldType != null) { - Analyzer actualAnalyzer = getAnalyzer(fieldType, type == MultiMatchQueryBuilder.Type.PHRASE); - name = fieldType.name(); - if (!groups.containsKey(actualAnalyzer)) { - groups.put(actualAnalyzer, new ArrayList<>()); - } - Float boost = entry.getValue(); - boost = boost == null ? Float.valueOf(1.0f) : boost; - groups.get(actualAnalyzer).add(new FieldAndFieldType(fieldType, boost)); - } else { - queries.add(new MatchNoDocsQuery("unknown field " + name)); - } - } - for (List group : groups.values()) { - if (group.size() > 1) { - blendedFields = new FieldAndFieldType[group.size()]; - int i = 0; - for (FieldAndFieldType fieldAndFieldType : group) { - blendedFields[i++] = fieldAndFieldType; - } - } else { - blendedFields = null; - } - /* - * We have to pick some field to pass through the superclass so - * we just pick the first field. It shouldn't matter because - * fields are already grouped by their analyzers/types. - */ - String representativeField = group.get(0).fieldType.name(); - Query q = parseGroup(type.matchQueryType(), representativeField, 1f, value, minimumShouldMatch); - if (q != null) { - queries.add(q); - } - } + private class BlendedQueryBuilder extends MatchQueryBuilder { + private final List blendedFields; + private final float tieBreaker; - return queries.isEmpty() ? null : queries; + BlendedQueryBuilder(Analyzer analyzer, List blendedFields, float tieBreaker) { + super(analyzer, blendedFields.get(0).fieldType); + this.blendedFields = blendedFields; + this.tieBreaker = tieBreaker; } @Override - public Query blendTerms(Term[] terms, MappedFieldType fieldType) { - if (blendedFields == null || blendedFields.length == 1) { - return super.blendTerms(terms, fieldType); - } + protected Query newSynonymQuery(Term[] terms) { BytesRef[] values = new BytesRef[terms.length]; for (int i = 0; i < terms.length; i++) { values[i] = terms[i].bytes(); } - return MultiMatchQuery.blendTerms(context, values, commonTermsCutoff, tieBreaker, lenient, blendedFields); + return blendTerms(context, values, commonTermsCutoff, tieBreaker, lenient, blendedFields); } @Override - public Query blendTerm(Term term, MappedFieldType fieldType) { - if (blendedFields == null) { - return super.blendTerm(term, fieldType); - } - return MultiMatchQuery.blendTerm(context, term.bytes(), commonTermsCutoff, tieBreaker, lenient, blendedFields); + public Query newTermQuery(Term term) { + return blendTerm(context, term.bytes(), commonTermsCutoff, tieBreaker, lenient, blendedFields); } @Override - public Query termQuery(MappedFieldType fieldType, BytesRef value) { - /* - * Use the string value of the term because we're reusing the - * portion of the query is usually after the analyzer has run on - * each term. We just skip that analyzer phase. - */ - return blendTerm(new Term(fieldType.name(), value.utf8ToString()), fieldType); + protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { + List disjunctions = new ArrayList<>(); + for (FieldAndBoost fieldType : blendedFields) { + Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements); + if (fieldType.boost != AbstractQueryBuilder.DEFAULT_BOOST) { + query = new BoostQuery(query, fieldType.boost); + } + disjunctions.add(query); + } + return new DisjunctionMaxQuery(disjunctions, tieBreaker); } @Override - public Query blendPhrase(PhraseQuery query, MappedFieldType type) { - if (blendedFields == null) { - return super.blendPhrase(query, type); + protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { + List disjunctions = new ArrayList<>(); + for (FieldAndBoost fieldType : blendedFields) { + Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); + if (fieldType.boost != AbstractQueryBuilder.DEFAULT_BOOST) { + query = new BoostQuery(query, fieldType.boost); + } + disjunctions.add(query); } - /** - * We build phrase queries for multi-word synonyms when {@link QueryBuilder#autoGenerateSynonymsPhraseQuery} is true. - */ - return MultiMatchQuery.blendPhrase(query, tieBreaker, blendedFields); + return new DisjunctionMaxQuery(disjunctions, tieBreaker); } } static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker, - boolean lenient, FieldAndFieldType... blendedFields) { + boolean lenient, List blendedFields) { + return blendTerms(context, new BytesRef[] {value}, commonTermsCutoff, tieBreaker, lenient, blendedFields); } static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker, - boolean lenient, FieldAndFieldType... blendedFields) { + boolean lenient, List blendedFields) { + List queries = new ArrayList<>(); - Term[] terms = new Term[blendedFields.length * values.length]; - float[] blendedBoost = new float[blendedFields.length * values.length]; + Term[] terms = new Term[blendedFields.size() * values.length]; + float[] blendedBoost = new float[blendedFields.size() * values.length]; int i = 0; - for (FieldAndFieldType ft : blendedFields) { + for (FieldAndBoost ft : blendedFields) { for (BytesRef term : values) { Query query; try { @@ -309,61 +261,15 @@ static Query blendTerms(QueryShardContext context, BytesRef[] values, Float comm // best effort: add clauses that are not term queries so that they have an opportunity to match // however their score contribution will be different // TODO: can we improve this? - return new DisjunctionMaxQuery(queries, 1.0f); - } - } - - /** - * Expand a {@link PhraseQuery} to multiple fields that share the same analyzer. - * Returns a {@link DisjunctionMaxQuery} with a disjunction for each expanded field. - */ - static Query blendPhrase(PhraseQuery query, float tiebreaker, FieldAndFieldType... fields) { - List disjunctions = new ArrayList<>(); - for (FieldAndFieldType field : fields) { - int[] positions = query.getPositions(); - Term[] terms = query.getTerms(); - PhraseQuery.Builder builder = new PhraseQuery.Builder(); - for (int i = 0; i < terms.length; i++) { - builder.add(new Term(field.fieldType.name(), terms[i].bytes()), positions[i]); - } - Query q = builder.build(); - if (field.boost != AbstractQueryBuilder.DEFAULT_BOOST) { - q = new BoostQuery(q, field.boost); - } - disjunctions.add(q); - } - return new DisjunctionMaxQuery(disjunctions, tiebreaker); - } - - @Override - protected Query blendTermQuery(Term term, MappedFieldType fieldType) { - if (queryBuilder == null) { - return super.blendTermQuery(term, fieldType); - } - return queryBuilder.blendTerm(term, fieldType); - } - - @Override - protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) { - if (queryBuilder == null) { - return super.blendTermsQuery(terms, fieldType); - } - return queryBuilder.blendTerms(terms, fieldType); - } - - @Override - protected Query blendPhraseQuery(PhraseQuery query, MappedFieldType fieldType) { - if (queryBuilder == null) { - return super.blendPhraseQuery(query, fieldType); + return new DisjunctionMaxQuery(queries, tieBreaker); } - return queryBuilder.blendPhrase(query, fieldType); } - static final class FieldAndFieldType { + static final class FieldAndBoost { final MappedFieldType fieldType; final float boost; - FieldAndFieldType(MappedFieldType fieldType, float boost) { + FieldAndBoost(MappedFieldType fieldType, float boost) { this.fieldType = Objects.requireNonNull(fieldType); this.boost = boost; } diff --git a/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java index a6e676006fdbf..4e4b04d1ff19c 100644 --- a/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java +++ b/server/src/test/java/org/apache/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -126,7 +126,7 @@ public void testMultiPhrasePrefixQuerySingleTerm() throws Exception { final String[] outputs = { "The quick brown fox." }; - MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("text"); query.add(new Term("text", "bro")); assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs); @@ -139,7 +139,7 @@ public void testMultiPhrasePrefixQuery() throws Exception { final String[] outputs = { "The quick brown fox." }; - MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("text"); query.add(new Term("text", "quick")); query.add(new Term("text", "brown")); query.add(new Term("text", "fo")); diff --git a/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java b/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java index 23b6939fe7a70..f0d4c88e01c19 100644 --- a/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java +++ b/server/src/test/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQueryTests.java @@ -43,24 +43,24 @@ public void testSimple() throws Exception { IndexReader reader = DirectoryReader.open(writer); IndexSearcher searcher = new IndexSearcher(reader); - MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); + MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("field"); query.add(new Term("field", "aa")); assertThat(searcher.count(query), equalTo(1)); - query = new MultiPhrasePrefixQuery(); + query = new MultiPhrasePrefixQuery("field"); query.add(new Term("field", "aaa")); query.add(new Term("field", "bb")); assertThat(searcher.count(query), equalTo(1)); - query = new MultiPhrasePrefixQuery(); + query = new MultiPhrasePrefixQuery("field"); query.setSlop(1); query.add(new Term("field", "aaa")); query.add(new Term("field", "cc")); assertThat(searcher.count(query), equalTo(1)); - query = new MultiPhrasePrefixQuery(); + query = new MultiPhrasePrefixQuery("field"); query.setSlop(1); query.add(new Term("field", "xxx")); assertThat(searcher.count(query), equalTo(0)); } -} \ No newline at end of file +} diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index acd6c9ee6f80b..63d30132ac13f 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -34,13 +34,19 @@ import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.compress.CompressedXContent; +import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.ToXContent; @@ -52,6 +58,7 @@ import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.MapperService.MergeReason; import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType; +import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder; import org.elasticsearch.index.query.MatchPhraseQueryBuilder; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.search.MatchQuery; @@ -956,4 +963,101 @@ public void testIndexPrefixMapping() throws IOException { assertThat(e.getMessage(), containsString("Cannot set index_prefixes on unindexed field [field]")); } } + + public void testFastPhrasePrefixes() throws IOException { + QueryShardContext queryShardContext = indexService.newQueryShardContext( + randomInt(20), null, () -> { + throw new UnsupportedOperationException(); + }, null); + + String mapping = Strings.toString(XContentFactory.jsonBuilder().startObject().startObject("type") + .startObject("properties") + .startObject("field") + .field("type", "text") + .field("analyzer", "my_stop_analyzer") + .startObject("index_prefixes") + .field("min_chars", 2) + .field("max_chars", 10) + .endObject() + .endObject() + .startObject("synfield") + .field("type", "text") + .field("analyzer", "standard") // will be replaced with MockSynonymAnalyzer + .field("index_phrases", true) + .startObject("index_prefixes") + .field("min_chars", 2) + .field("max_chars", 10) + .endObject() + .endObject() + .endObject() + .endObject().endObject()); + + queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); + + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").toQuery(queryShardContext); + Query expected = new SpanNearQuery.Builder("field", true) + .addClause(new SpanTermQuery(new Term("field", "two"))) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "words")), "field") + ) + .build(); + assertThat(q, equalTo(expected)); + + Query q2 = new MatchPhrasePrefixQueryBuilder("field", "three words here").toQuery(queryShardContext); + expected = new SpanNearQuery.Builder("field", true) + .addClause(new SpanTermQuery(new Term("field", "three"))) + .addClause(new SpanTermQuery(new Term("field", "words"))) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "here")), "field") + ) + .build(); + assertThat(q2, equalTo(expected)); + + Query q3 = new MatchPhrasePrefixQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); + expected = new SpanNearQuery.Builder("field", true) + .setSlop(1) + .addClause(new SpanTermQuery(new Term("field", "two"))) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "words")), "field") + ) + .build(); + assertThat(q3, equalTo(expected)); + + Query q4 = new MatchPhrasePrefixQueryBuilder("field", "singleton").toQuery(queryShardContext); + assertThat(q4, is(new SynonymQuery(new Term("field._index_prefix", "singleton")))); + + Query q5 = new MatchPhrasePrefixQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); + expected = new SpanNearQuery.Builder("field", true) + .addClause(new SpanTermQuery(new Term("field", "sparkle"))) + .addGap(1) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "stopword")), "field") + ) + .build(); + assertThat(q5, equalTo(expected)); + + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q6 = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "motor dogs"); + expected = new SpanNearQuery.Builder("synfield", true) + .addClause(new SpanTermQuery(new Term("synfield", "motor"))) + .addClause( + new SpanOrQuery( + new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("synfield._index_prefix", "dogs")), "synfield" + ), + new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("synfield._index_prefix", "dog")), "synfield" + ) + ) + ) + .build(); + assertThat(q6, equalTo(expected)); + + Query q7 = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "field", "motor d"); + MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery("field"); + mpq.add(new Term("field", "motor")); + mpq.add(new Term("field", "d")); + assertThat(q7, equalTo(mpq)); + } } diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java index fd722ef0c77af..a6aa53e3aa0e9 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java @@ -19,12 +19,9 @@ package org.elasticsearch.index.query; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchNoDocsQuery; -import org.apache.lucene.search.PointRangeQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.SynonymQuery; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.search.internal.SearchContext; @@ -34,7 +31,6 @@ import java.util.HashMap; import java.util.Map; -import static org.elasticsearch.test.AbstractBuilderTestCase.STRING_ALIAS_FIELD_NAME; import static org.hamcrest.CoreMatchers.either; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.Matchers.containsString; @@ -43,8 +39,7 @@ public class MatchPhrasePrefixQueryBuilderTests extends AbstractQueryTestCase { @Override protected MatchPhrasePrefixQueryBuilder doCreateTestQueryBuilder() { - String fieldName = randomFrom(STRING_FIELD_NAME, STRING_ALIAS_FIELD_NAME, BOOLEAN_FIELD_NAME, INT_FIELD_NAME, - DOUBLE_FIELD_NAME, DATE_FIELD_NAME); + String fieldName = randomFrom(STRING_FIELD_NAME, STRING_ALIAS_FIELD_NAME); Object value; if (isTextField(fieldName)) { int terms = randomIntBetween(0, 3); @@ -91,10 +86,9 @@ protected Map getAlternateVersions() { protected void doAssertLuceneQuery(MatchPhrasePrefixQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException { assertThat(query, notNullValue()); - assertThat(query, - either(instanceOf(BooleanQuery.class)).or(instanceOf(MultiPhrasePrefixQuery.class)) - .or(instanceOf(TermQuery.class)).or(instanceOf(PointRangeQuery.class)) - .or(instanceOf(IndexOrDocValuesQuery.class)).or(instanceOf(MatchNoDocsQuery.class))); + assertThat(query, either(instanceOf(MultiPhrasePrefixQuery.class)) + .or(instanceOf(SynonymQuery.class)) + .or(instanceOf(MatchNoDocsQuery.class))); } public void testIllegalValues() { diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java index 184ee2759c15e..c258cce6c7c50 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchQueryBuilderTests.java @@ -25,7 +25,6 @@ import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.MatchNoDocsQuery; @@ -371,13 +370,10 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws public void testMatchPhrasePrefixWithBoost() throws Exception { QueryShardContext context = createShardContext(); { - // field boost is applied on a single term query + // field boost is ignored on a single term query MatchPhrasePrefixQueryBuilder builder = new MatchPhrasePrefixQueryBuilder("string_boost", "foo"); Query query = builder.toQuery(context); - assertThat(query, instanceOf(BoostQuery.class)); - assertThat(((BoostQuery) query).getBoost(), equalTo(4f)); - Query innerQuery = ((BoostQuery) query).getQuery(); - assertThat(innerQuery, instanceOf(MultiPhrasePrefixQuery.class)); + assertThat(query, instanceOf(MultiPhrasePrefixQuery.class)); } { diff --git a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java index 43c76f028e22e..27651e0da0de4 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java @@ -91,7 +91,12 @@ protected MultiMatchQueryBuilder doCreateTestQueryBuilder() { // sets other parameters of the multi match query if (randomBoolean()) { - query.type(randomFrom(MultiMatchQueryBuilder.Type.values())); + if (fieldName.equals(STRING_FIELD_NAME)) { + query.type(randomFrom(MultiMatchQueryBuilder.Type.values())); + } else { + query.type(randomValueOtherThan(MultiMatchQueryBuilder.Type.PHRASE_PREFIX, + () -> randomFrom(MultiMatchQueryBuilder.Type.values()))); + } } if (randomBoolean()) { query.operator(randomFrom(Operator.values())); @@ -384,6 +389,11 @@ public void testDefaultField() throws Exception { ), 0.0f ); assertEquals(expected, query); + + context.getIndexSettings().updateIndexMetaData( + newIndexMeta("index", context.getIndexSettings().getSettings(), + Settings.builder().putNull("index.query.default_field").build()) + ); } public void testWithStopWords() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java index baa0fed01bbf0..0eb6de7da252f 100644 --- a/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/QueryStringQueryBuilderTests.java @@ -1208,20 +1208,21 @@ public void testUnmappedFieldRewriteToMatchNoDocs() throws IOException { .field("unmapped_field") .lenient(true) .toQuery(createShardContext()); - assertEquals(new MatchNoDocsQuery(""), query); + assertEquals(new BooleanQuery.Builder().build(), query); // Unmapped prefix field query = new QueryStringQueryBuilder("unmapped_field:hello") .lenient(true) .toQuery(createShardContext()); - assertEquals(new MatchNoDocsQuery(""), query); + assertEquals(new BooleanQuery.Builder().build(), query); // Unmapped fields query = new QueryStringQueryBuilder("hello") .lenient(true) .field("unmapped_field") + .field("another_field") .toQuery(createShardContext()); - assertEquals(new MatchNoDocsQuery(""), query); + assertEquals(new BooleanQuery.Builder().build(), query); } public void testDefaultField() throws Exception { diff --git a/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java b/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java index 1087bbbf9fd8f..58baadd83573d 100644 --- a/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java +++ b/server/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java @@ -27,7 +27,6 @@ import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; @@ -44,7 +43,7 @@ import org.elasticsearch.index.mapper.MockFieldMapper.FakeFieldType; import org.elasticsearch.index.query.MultiMatchQueryBuilder; import org.elasticsearch.index.query.QueryShardContext; -import org.elasticsearch.index.search.MultiMatchQuery.FieldAndFieldType; +import org.elasticsearch.index.search.MultiMatchQuery.FieldAndBoost; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.test.ESSingleNodeTestCase; import org.elasticsearch.test.MockKeywordPlugin; @@ -105,7 +104,8 @@ public void testCrossFieldMultiMatchQuery() throws IOException { for (float tieBreaker : new float[] {0.0f, 0.5f}) { Query parsedQuery = multiMatchQuery("banon") .field("name.first", 2) - .field("name.last", 3).field("foobar") + .field("name.last", 3) + .field("foobar") .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS) .tieBreaker(tieBreaker) .toQuery(queryShardContext); @@ -113,11 +113,7 @@ public void testCrossFieldMultiMatchQuery() throws IOException { Query rewrittenQuery = searcher.searcher().rewrite(parsedQuery); Query tq1 = new BoostQuery(new TermQuery(new Term("name.first", "banon")), 2); Query tq2 = new BoostQuery(new TermQuery(new Term("name.last", "banon")), 3); - Query expected = new DisjunctionMaxQuery( - Arrays.asList( - new MatchNoDocsQuery("unknown field foobar"), - new DisjunctionMaxQuery(Arrays.asList(tq2, tq1), tieBreaker) - ), tieBreaker); + Query expected = new DisjunctionMaxQuery(Arrays.asList(tq2, tq1), tieBreaker); assertEquals(expected, rewrittenQuery); } } @@ -133,7 +129,7 @@ public void testBlendTerms() { Query expected = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f); Query actual = MultiMatchQuery.blendTerm( indexService.newQueryShardContext(randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null), - new BytesRef("baz"), null, 1f, false, new FieldAndFieldType(ft1, 2), new FieldAndFieldType(ft2, 3)); + new BytesRef("baz"), null, 1f, false, Arrays.asList(new FieldAndBoost(ft1, 2), new FieldAndBoost(ft2, 3))); assertEquals(expected, actual); } @@ -149,7 +145,7 @@ public void testBlendTermsWithFieldBoosts() { Query expected = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f); Query actual = MultiMatchQuery.blendTerm( indexService.newQueryShardContext(randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null), - new BytesRef("baz"), null, 1f, false, new FieldAndFieldType(ft1, 2), new FieldAndFieldType(ft2, 3)); + new BytesRef("baz"), null, 1f, false, Arrays.asList(new FieldAndBoost(ft1, 2), new FieldAndBoost(ft2, 3))); assertEquals(expected, actual); } @@ -171,7 +167,7 @@ public Query termQuery(Object value, QueryShardContext context) { ), 1f); Query actual = MultiMatchQuery.blendTerm( indexService.newQueryShardContext(randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null), - new BytesRef("baz"), null, 1f, true, new FieldAndFieldType(ft1, 2), new FieldAndFieldType(ft2, 3)); + new BytesRef("baz"), null, 1f, true, Arrays.asList(new FieldAndBoost(ft1, 2), new FieldAndBoost(ft2, 3))); assertEquals(expected, actual); } @@ -185,7 +181,7 @@ public Query termQuery(Object value, QueryShardContext context) { ft.setName("bar"); expectThrows(IllegalArgumentException.class, () -> MultiMatchQuery.blendTerm( indexService.newQueryShardContext(randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null), - new BytesRef("baz"), null, 1f, false, new FieldAndFieldType(ft, 1))); + new BytesRef("baz"), null, 1f, false, Arrays.asList(new FieldAndBoost(ft, 1)))); } public void testBlendNoTermQuery() { @@ -209,7 +205,7 @@ public Query termQuery(Object value, QueryShardContext context) { ), 1.0f); Query actual = MultiMatchQuery.blendTerm( indexService.newQueryShardContext(randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null), - new BytesRef("baz"), null, 1f, false, new FieldAndFieldType(ft1, 2), new FieldAndFieldType(ft2, 3)); + new BytesRef("baz"), null, 1f, false, Arrays.asList(new FieldAndBoost(ft1, 2), new FieldAndBoost(ft2, 3))); assertEquals(expected, actual); } From 8d58cadc26f839b330be328da2740ffdd77362ce Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Wed, 16 Jan 2019 09:36:36 +0100 Subject: [PATCH 2/5] address feedback --- .../AnnotatedTextFieldMapper.java | 16 +- .../SpanBooleanQueryRewriteWithMaxClause.java | 119 ++++++++ .../index/mapper/MappedFieldType.java | 10 +- .../index/mapper/TextFieldMapper.java | 41 ++- .../query/SpanMultiTermQueryBuilder.java | 155 +++-------- .../index/search/MatchQuery.java | 255 ++++++++++++++---- .../index/mapper/TextFieldMapperTests.java | 146 +++++----- .../query/SpanMultiTermQueryBuilderTests.java | 137 +++++----- 8 files changed, 552 insertions(+), 327 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/common/lucene/search/SpanBooleanQueryRewriteWithMaxClause.java diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index c49044e4b6b87..2aadfd2218590 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -33,8 +33,11 @@ import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.search.NormsFieldExistsQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanQuery; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -599,6 +602,14 @@ public Query existsQuery(QueryShardContext context) { } } + @Override + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { + SpanMultiTermQueryWrapper spanMulti = + new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(name(), indexedValueForSearch(value)))); + spanMulti.setRewriteMethod(method); + return spanMulti; + } + @Override public Query phraseQuery(TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { return TextFieldMapper.createPhraseQuery(stream, name(), slop, enablePositionIncrements); @@ -610,9 +621,8 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi } @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions, - boolean enablePositionIncrements) throws IOException { - return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions, enablePositionIncrements); + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + return TextFieldMapper.createPhrasePrefixQuery(stream, name(), slop, maxExpansions); } } diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/SpanBooleanQueryRewriteWithMaxClause.java b/server/src/main/java/org/elasticsearch/common/lucene/search/SpanBooleanQueryRewriteWithMaxClause.java new file mode 100644 index 0000000000000..e78770ed2a85a --- /dev/null +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/SpanBooleanQueryRewriteWithMaxClause.java @@ -0,0 +1,119 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.lucene.search; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.queries.SpanMatchNoDocsQuery; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiTermQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; + +/** + * A span rewrite method that extracts the first maxExpansions terms + * that match the {@link MultiTermQuery} in the terms dictionary. + * The rewrite throws an error if more than maxExpansions terms are found and hardLimit + * is set. + */ +public class SpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod { + private final int maxExpansions; + private final boolean hardLimit; + + public SpanBooleanQueryRewriteWithMaxClause() { + this(BooleanQuery.getMaxClauseCount(), true); + } + + public SpanBooleanQueryRewriteWithMaxClause(int maxExpansions, boolean hardLimit) { + this.maxExpansions = maxExpansions; + this.hardLimit = hardLimit; + } + + public int getMaxExpansions() { + return maxExpansions; + } + + public boolean isHardLimit() { + return hardLimit; + } + + @Override + public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + final MultiTermQuery.RewriteMethod delegate = new MultiTermQuery.RewriteMethod() { + @Override + public Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException { + Collection queries = collectTerms(reader, query); + if (queries.size() == 0) { + return new SpanMatchNoDocsQuery(query.getField(), "no expansion found for " + query.toString()); + } else if (queries.size() == 1) { + return queries.iterator().next(); + } else { + return new SpanOrQuery(queries.toArray(new SpanQuery[0])); + } + } + + private Collection collectTerms(IndexReader reader, MultiTermQuery query) throws IOException { + Set queries = new HashSet<>(); + IndexReaderContext topReaderContext = reader.getContext(); + for (LeafReaderContext context : topReaderContext.leaves()) { + final Terms terms = context.reader().terms(query.getField()); + if (terms == null) { + // field does not exist + continue; + } + + final TermsEnum termsEnum = getTermsEnum(query, terms, null); + assert termsEnum != null; + + if (termsEnum == TermsEnum.EMPTY) + continue; + + BytesRef bytes; + while ((bytes = termsEnum.next()) != null) { + if (queries.size() >= maxExpansions) { + if (hardLimit) { + throw new RuntimeException("[" + query.toString() + " ] " + + "exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]"); + } else { + return queries; + } + } + queries.add(new SpanTermQuery(new Term(query.getField(), bytes))); + } + } + return queries; + } + }; + return (SpanQuery) delegate.rewrite(reader, query); + } +} diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index f806fc40125ad..f785e01125f69 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -35,6 +35,8 @@ import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.intervals.IntervalsSource; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.Nullable; @@ -375,12 +377,16 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi + "] which is of type [" + typeName() + "]"); } - public Query phrasePrefixQuery(TokenStream stream, int slop, - int maxExpansions, boolean enablePositionIncrements) throws IOException { + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { throw new IllegalArgumentException("Can only use phrase prefix queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { + throw new IllegalArgumentException("Can only use span prefix queries on text fields - not on [" + name + + "] which is of type [" + typeName() + "]"); + } + /** * Create an {@link IntervalsSource} to be used for proximity queries */ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 1c638203a23c8..38ca2d49e72bb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -39,11 +39,13 @@ import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.NormsFieldExistsQuery; import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.intervals.IntervalsSource; import org.apache.lucene.search.spans.FieldMaskingSpanQuery; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; @@ -605,6 +607,23 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, Quer return tq; } + @Override + public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRewriteMethod method, QueryShardContext context) { + failIfNotIndexed(); + if (prefixFieldType != null + && value.length() >= prefixFieldType.minChars + && value.length() <= prefixFieldType.maxChars + && prefixFieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { + + return new FieldMaskingSpanQuery(new SpanTermQuery(new Term(prefixFieldType.name(), indexedValueForSearch(value))), name()); + } else { + SpanMultiTermQueryWrapper spanMulti = + new SpanMultiTermQueryWrapper<>(new PrefixQuery(new Term(name(), indexedValueForSearch(value)))); + spanMulti.setRewriteMethod(method); + return spanMulti; + } + } + @Override public Query existsQuery(QueryShardContext context) { if (omitNorms()) { @@ -662,16 +681,15 @@ public Query multiPhraseQuery(TokenStream stream, int slop, boolean enablePositi } @Override - public Query phrasePrefixQuery(TokenStream stream, int slop, - int maxExpansions, boolean enablePositionIncrements) throws IOException { - return analyzePhrasePrefix(stream, slop, maxExpansions, enablePositionIncrements); + public Query phrasePrefixQuery(TokenStream stream, int slop, int maxExpansions) throws IOException { + return analyzePhrasePrefix(stream, slop, maxExpansions); } - private Query analyzePhrasePrefix(TokenStream stream, int slop, - int maxExpansions, boolean enablePositionIncrements) throws IOException { - MultiPhrasePrefixQuery query = createPhrasePrefixQuery(stream, name(), slop, maxExpansions, enablePositionIncrements); + private Query analyzePhrasePrefix(TokenStream stream, int slop, int maxExpansions) throws IOException { + final MultiPhrasePrefixQuery query = createPhrasePrefixQuery(stream, name(), slop, maxExpansions); - if (prefixFieldType == null + if (slop > 0 + || prefixFieldType == null || prefixFieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { return query; } @@ -943,8 +961,8 @@ public static Query createPhraseQuery(TokenStream stream, String field, int slop return mpqb.build(); } - public static MultiPhrasePrefixQuery createPhrasePrefixQuery(TokenStream stream, String field, int slop, int maxExpansions, - boolean enablePositionIncrements) throws IOException { + public static MultiPhrasePrefixQuery createPhrasePrefixQuery(TokenStream stream, String field, + int slop, int maxExpansions) throws IOException { MultiPhrasePrefixQuery builder = new MultiPhrasePrefixQuery(field); builder.setSlop(slop); builder.setMaxExpansions(maxExpansions); @@ -957,12 +975,11 @@ public static MultiPhrasePrefixQuery createPhrasePrefixQuery(TokenStream stream, stream.reset(); int position = -1; while (stream.incrementToken()) { - int posInc = enablePositionIncrements ? posIncrAtt.getPositionIncrement() : 1; - if (posInc != 0) { + if (posIncrAtt.getPositionIncrement() != 0) { if (currentTerms.isEmpty() == false) { builder.add(currentTerms.toArray(new Term[0]), position); } - position += posInc; + position += posIncrAtt.getPositionIncrement(); currentTerms.clear(); } currentTerms.add(new Term(field, termAtt.getBytesRef())); diff --git a/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java index 22fca7d1d0b8f..49e5e53e1ed91 100644 --- a/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilder.java @@ -18,31 +18,19 @@ */ package org.elasticsearch.index.query; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.TermStates; import org.apache.lucene.queries.SpanMatchNoDocsQuery; -import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiTermQuery; -import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.spans.FieldMaskingSpanQuery; -import org.apache.lucene.search.ScoringRewrite; import org.apache.lucene.search.TopTermsRewrite; -import org.apache.lucene.search.spans.SpanBoostQuery; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause; import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -50,8 +38,6 @@ import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; import java.util.Objects; /** @@ -138,126 +124,53 @@ public static SpanMultiTermQueryBuilder fromXContent(XContentParser parser) thro return new SpanMultiTermQueryBuilder(subQuery).queryName(queryName).boost(boost); } - static class TopTermSpanBooleanQueryRewriteWithMaxClause extends SpanMultiTermQueryWrapper.SpanRewriteMethod { - private final long maxExpansions; - - TopTermSpanBooleanQueryRewriteWithMaxClause() { - this.maxExpansions = BooleanQuery.getMaxClauseCount(); - } - - @Override - public SpanQuery rewrite(IndexReader reader, MultiTermQuery query) throws IOException { - final MultiTermQuery.RewriteMethod delegate = new ScoringRewrite>() { - @Override - protected List getTopLevelBuilder() { - return new ArrayList(); - } - - @Override - protected Query build(List builder) { - return new SpanOrQuery((SpanQuery[]) builder.toArray(new SpanQuery[builder.size()])); - } - - @Override - protected void checkMaxClauseCount(int count) { - if (count > maxExpansions) { - throw new RuntimeException("[" + query.toString() + " ] " + - "exceeds maxClauseCount [ Boolean maxClauseCount is set to " + BooleanQuery.getMaxClauseCount() + "]"); - } - } - - @Override - protected void addClause(List topLevel, Term term, int docCount, float boost, TermStates states) { - SpanTermQuery q = new SpanTermQuery(term, states); - topLevel.add(q); - } - }; - return (SpanQuery) delegate.rewrite(reader, query); - } - } - @Override protected Query doToQuery(QueryShardContext context) throws IOException { - Query subQuery = multiTermQueryBuilder.toQuery(context); - float boost = AbstractQueryBuilder.DEFAULT_BOOST; - while (true) { - if (subQuery instanceof ConstantScoreQuery) { - subQuery = ((ConstantScoreQuery) subQuery).getQuery(); - boost = 1; - } else if (subQuery instanceof BoostQuery) { - BoostQuery boostQuery = (BoostQuery) subQuery; - subQuery = boostQuery.getQuery(); - boost *= boostQuery.getBoost(); - } else { - break; - } - } - // no MultiTermQuery extends SpanQuery, so SpanBoostQuery is not supported here - assert subQuery instanceof SpanBoostQuery == false; - - if (subQuery instanceof MatchNoDocsQuery) { - return new SpanMatchNoDocsQuery(multiTermQueryBuilder.fieldName(), subQuery.toString()); - } - - final SpanQuery spanQuery; - if (subQuery instanceof TermQuery) { - /** - * Text fields that index prefixes can rewrite prefix queries - * into term queries. See {@link TextFieldMapper.TextFieldType#prefixQuery}. - */ - if (multiTermQueryBuilder.getClass() != PrefixQueryBuilder.class) { - throw new UnsupportedOperationException("unsupported inner query generated by " + - multiTermQueryBuilder.getClass().getName() + ", should be " + MultiTermQuery.class.getName() - + " but was " + subQuery.getClass().getName()); - } - + if (multiTermQueryBuilder instanceof PrefixQueryBuilder) { PrefixQueryBuilder prefixBuilder = (PrefixQueryBuilder) multiTermQueryBuilder; - MappedFieldType fieldType = context.fieldMapper(prefixBuilder.fieldName()); - String fieldName = fieldType != null ? fieldType.name() : prefixBuilder.fieldName(); - - if (context.getIndexSettings().getIndexVersionCreated().before(Version.V_6_4_0)) { - /** - * Indices created in this version do not index positions on the prefix field - * so we cannot use it to match positional queries. Instead, we explicitly create the prefix - * query on the main field to avoid the rewrite. - */ - PrefixQuery prefixQuery = new PrefixQuery(new Term(fieldName, prefixBuilder.value())); - if (prefixBuilder.rewrite() != null) { - MultiTermQuery.RewriteMethod rewriteMethod = - QueryParsers.parseRewriteMethod(prefixBuilder.rewrite(), null, LoggingDeprecationHandler.INSTANCE); - prefixQuery.setRewriteMethod(rewriteMethod); + MappedFieldType fieldType = context.fieldMapper(multiTermQueryBuilder.fieldName()); + if (fieldType == null) { + return new SpanMatchNoDocsQuery(multiTermQueryBuilder.fieldName(), "unknown field"); + } + final SpanMultiTermQueryWrapper.SpanRewriteMethod spanRewriteMethod; + if (prefixBuilder.rewrite() != null) { + MultiTermQuery.RewriteMethod rewriteMethod = + QueryParsers.parseRewriteMethod(prefixBuilder.rewrite(), null, LoggingDeprecationHandler.INSTANCE); + if (rewriteMethod instanceof TopTermsRewrite) { + TopTermsRewrite innerRewrite = (TopTermsRewrite) rewriteMethod; + spanRewriteMethod = new SpanMultiTermQueryWrapper.TopTermsSpanBooleanQueryRewrite(innerRewrite.getSize()); + } else { + spanRewriteMethod = new SpanBooleanQueryRewriteWithMaxClause(); } - subQuery = prefixQuery; - spanQuery = new SpanMultiTermQueryWrapper<>(prefixQuery); } else { - /** - * Prefixes are indexed in a different field so we mask the term query with the original field - * name. This is required because span_near and span_or queries don't work across different field. - * The masking is safe because the prefix field is indexed using the same content than the original field - * and the prefix analyzer preserves positions. - */ - SpanTermQuery spanTermQuery = new SpanTermQuery(((TermQuery) subQuery).getTerm()); - spanQuery = new FieldMaskingSpanQuery(spanTermQuery, fieldName); + spanRewriteMethod = new SpanBooleanQueryRewriteWithMaxClause(); } + return fieldType.spanPrefixQuery(prefixBuilder.value(), spanRewriteMethod, context); } else { - if (subQuery instanceof MultiTermQuery == false) { + Query subQuery = multiTermQueryBuilder.toQuery(context); + while (true) { + if (subQuery instanceof ConstantScoreQuery) { + subQuery = ((ConstantScoreQuery) subQuery).getQuery(); + } else if (subQuery instanceof BoostQuery) { + BoostQuery boostQuery = (BoostQuery) subQuery; + subQuery = boostQuery.getQuery(); + } else { + break; + } + } + if (subQuery instanceof MatchNoDocsQuery) { + return new SpanMatchNoDocsQuery(multiTermQueryBuilder.fieldName(), subQuery.toString()); + } else if (subQuery instanceof MultiTermQuery == false) { throw new UnsupportedOperationException("unsupported inner query, should be " + MultiTermQuery.class.getName() + " but was " + subQuery.getClass().getName()); } - spanQuery = new SpanMultiTermQueryWrapper<>((MultiTermQuery) subQuery); - } - if (subQuery instanceof MultiTermQuery) { MultiTermQuery multiTermQuery = (MultiTermQuery) subQuery; - SpanMultiTermQueryWrapper wrapper = (SpanMultiTermQueryWrapper) spanQuery; + SpanMultiTermQueryWrapper wrapper = new SpanMultiTermQueryWrapper<>(multiTermQuery); if (multiTermQuery.getRewriteMethod() instanceof TopTermsRewrite == false) { - wrapper.setRewriteMethod(new TopTermSpanBooleanQueryRewriteWithMaxClause()); + wrapper.setRewriteMethod(new SpanBooleanQueryRewriteWithMaxClause()); } + return wrapper; } - if (boost != AbstractQueryBuilder.DEFAULT_BOOST) { - return new SpanBoostQuery(spanQuery, boost); - } - - return spanQuery; } @Override diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java index c9683fd94af75..ad4b267eef643 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -36,22 +36,29 @@ import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.util.QueryBuilder; import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings; import org.elasticsearch.ElasticsearchException; -import org.elasticsearch.common.CheckedFunction; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.Queries; +import org.elasticsearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; +import java.util.ArrayList; import java.util.Iterator; +import java.util.List; import java.util.function.Supplier; import static org.elasticsearch.common.lucene.search.Queries.newLenientFieldQuery; @@ -124,19 +131,10 @@ public void writeTo(StreamOutput out) throws IOException { } } - /** - * the default phrase slop - */ public static final int DEFAULT_PHRASE_SLOP = 0; - /** - * the default leniency setting - */ public static final boolean DEFAULT_LENIENCY = false; - /** - * the default zero terms query - */ public static final ZeroTermsQuery DEFAULT_ZERO_TERMS_QUERY = ZeroTermsQuery.NONE; protected final QueryShardContext context; @@ -155,6 +153,9 @@ public void writeTo(StreamOutput out) throws IOException { protected int maxExpansions = FuzzyQuery.defaultMaxExpansions; + protected SpanMultiTermQueryWrapper.SpanRewriteMethod spanRewriteMethod = + new SpanBooleanQueryRewriteWithMaxClause(FuzzyQuery.defaultMaxExpansions, false); + protected boolean transpositions = FuzzyQuery.defaultTranspositions; protected MultiTermQuery.RewriteMethod fuzzyRewriteMethod; @@ -208,6 +209,7 @@ public void setFuzzyPrefixLength(int fuzzyPrefixLength) { public void setMaxExpansions(int maxExpansions) { this.maxExpansions = maxExpansions; + this.spanRewriteMethod = new SpanBooleanQueryRewriteWithMaxClause(maxExpansions, false); } public void setTranspositions(boolean transpositions) { @@ -344,44 +346,22 @@ class MatchQueryBuilder extends QueryBuilder { setEnablePositionIncrements(enablePositionIncrements); } - /** - * Checks if graph analysis should be enabled for the field depending - * on the provided {@link Analyzer} - */ @Override protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, int slop) { assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; - return createQuery(field, queryText, source -> createFieldQuery(source, operator, field, quoted, slop)); + Type type = quoted ? Type.PHRASE : Type.BOOLEAN; + return createQuery(field, queryText, type, operator, slop); } public Query createPhrasePrefixQuery(String field, String queryText, int slop) { - return createQuery(field, queryText, source -> createPhrasePrefixQuery(source, field, slop)); + return createQuery(field, queryText, Type.PHRASE_PREFIX, occur, slop); } - private Query createQuery(String field, String queryText, CheckedFunction queryFunc) { - // Use the analyzer to get all the tokens, and then build an appropriate - // query based on the analysis chain. - try (TokenStream source = analyzer.tokenStream(field, queryText)) { - if (source.hasAttribute(DisableGraphAttribute.class)) { - /* - * A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid - * paths explosion. See {@link org.elasticsearch.index.analysis.ShingleTokenFilterFactory} for details. - */ - setEnableGraphQueries(false); - } - try { - return queryFunc.apply(source); - } finally { - setEnableGraphQueries(true); - } - } catch (IOException e) { - throw new RuntimeException("Error analyzing query text", e); - } - } + private Query createFieldQuery(TokenStream source, Type type, BooleanClause.Occur operator, String field, int phraseSlop) { + assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; - private Query createPhrasePrefixQuery(TokenStream source, String field, int slop) { - // Build an appropriate phrase prefix query based on the analysis chain. + // Build an appropriate query based on the analysis chain. try (CachingTokenFilter stream = new CachingTokenFilter(source)) { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); @@ -392,8 +372,12 @@ private Query createPhrasePrefixQuery(TokenStream source, String field, int slop return null; } + // phase 1: read through the stream and assess the situation: + // counting the number of tokens/positions and marking if we have any synonyms. + int numTokens = 0; int positionCount = 0; + boolean hasSynonyms = false; boolean isGraph = false; stream.reset(); @@ -402,6 +386,8 @@ private Query createPhrasePrefixQuery(TokenStream source, String field, int slop int positionIncrement = posIncAtt.getPositionIncrement(); if (positionIncrement != 0) { positionCount += positionIncrement; + } else { + hasSynonyms = true; } int positionLength = posLenAtt.getPositionLength(); @@ -412,21 +398,119 @@ private Query createPhrasePrefixQuery(TokenStream source, String field, int slop // phase 2: based on token count, presence of synonyms, and options // formulate a single term, boolean, or phrase. - if (numTokens == 0) { return null; + } else if (numTokens == 1) { + // single term + if (type == Type.PHRASE_PREFIX) { + return analyzePhrasePrefix(field, stream, phraseSlop, positionCount); + } else { + return analyzeTerm(field, stream); + } } else if (isGraph) { // graph - return analyzeGraphPhrasePrefix(stream, field, slop); + if (type == Type.PHRASE || type == Type.PHRASE_PREFIX) { + return analyzeGraphPhrase(stream, field, type, phraseSlop); + } else { + return analyzeGraphBoolean(field, stream, operator); + } + } else if (type == Type.PHRASE && positionCount > 1) { + // phrase + if (hasSynonyms) { + // complex phrase with synonyms + return analyzeMultiPhrase(field, stream, phraseSlop); + } else { + // simple phrase + return analyzePhrase(field, stream, phraseSlop); + } + } else if (type == Type.PHRASE_PREFIX) { + // phrase prefix + return analyzePhrasePrefix(field, stream, phraseSlop, positionCount); } else { - // single position - return analyzePhrasePrefix(field, stream, slop, positionCount); + // boolean + if (positionCount == 1) { + // only one position, with synonyms + return analyzeBoolean(field, stream); + } else { + // complex case: multiple positions + return analyzeMultiBoolean(field, stream, operator); + } } } catch (IOException e) { throw new RuntimeException("Error analyzing query text", e); } } + private Query createQuery(String field, String queryText, Type type, BooleanClause.Occur operator, int phraseSlop) { + // Use the analyzer to get all the tokens, and then build an appropriate + // query based on the analysis chain. + try (TokenStream source = analyzer.tokenStream(field, queryText)) { + if (source.hasAttribute(DisableGraphAttribute.class)) { + /* + * A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid + * paths explosion. See {@link org.elasticsearch.index.analysis.ShingleTokenFilterFactory} for details. + */ + setEnableGraphQueries(false); + } + try { + return createFieldQuery(source, type, operator, field, phraseSlop); + } finally { + setEnableGraphQueries(true); + } + } catch (IOException e) { + throw new RuntimeException("Error analyzing query text", e); + } + } + + private SpanQuery newSpanQuery(Term[] terms, boolean prefix) { + if (terms.length == 1) { + return prefix ? fieldType.spanPrefixQuery(terms[0].text(), spanRewriteMethod, context) : new SpanTermQuery(terms[0]); + } + SpanQuery[] spanQueries = new SpanQuery[terms.length]; + for (int i = 0; i < terms.length; i++) { + spanQueries[i] = prefix ? new SpanTermQuery(terms[i]) : + fieldType.spanPrefixQuery(terms[i].text(), spanRewriteMethod, context); + } + return new SpanOrQuery(spanQueries); + } + + @Override + protected SpanQuery createSpanQuery(TokenStream in, String field) throws IOException { + return createSpanQuery(in, field, false); + } + + private SpanQuery createSpanQuery(TokenStream in, String field, boolean prefix) throws IOException { + TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncAtt = in.getAttribute(PositionIncrementAttribute.class); + if (termAtt == null) { + return null; + } + + SpanNearQuery.Builder builder = new SpanNearQuery.Builder(field, true); + Term lastTerm = null; + while (in.incrementToken()) { + if (posIncAtt.getPositionIncrement() > 1) { + builder.addGap(posIncAtt.getPositionIncrement()-1); + } + if (lastTerm != null) { + builder.addClause(new SpanTermQuery(lastTerm)); + } + lastTerm = new Term(field, termAtt.getBytesRef()); + } + if (lastTerm != null) { + SpanQuery spanQuery = prefix ? + fieldType.spanPrefixQuery(lastTerm.text(), spanRewriteMethod, context) : new SpanTermQuery(lastTerm); + builder.addClause(spanQuery); + } + SpanNearQuery query = builder.build(); + SpanQuery[] clauses = query.getClauses(); + if (clauses.length == 1) { + return clauses[0]; + } else { + return query; + } + } + @Override protected Query newTermQuery(Term term) { Supplier querySupplier; @@ -484,7 +568,7 @@ private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, in if (positionCount > 1) { checkForPositions(field); } - return fieldType.phrasePrefixQuery(stream, slop, maxExpansions, enablePositionIncrements); + return fieldType.phrasePrefixQuery(stream, slop, maxExpansions); } catch (IllegalArgumentException | IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); @@ -493,22 +577,87 @@ private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, in } } - private Query analyzeGraphPhrasePrefix(TokenStream source, String field, int slop) throws IOException { + private Query analyzeGraphPhrase(TokenStream source, String field, Type type, int slop) throws IOException { + assert type == Type.PHRASE_PREFIX || type == Type.PHRASE; + source.reset(); GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source); + if (phraseSlop > 0) { + /* + * Creates a boolean query from the graph token stream by extracting all the finite strings from the graph + * and using them to create phrase queries with the appropriate slop. + */ + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + Iterator it = graph.getFiniteStrings(); + while (it.hasNext()) { + Query query = createFieldQuery(it.next(), type, BooleanClause.Occur.MUST, field, slop); + if (query != null) { + builder.add(query, BooleanClause.Occur.SHOULD); + } + } + return builder.build(); + } + /* - * Creates a boolean query from the graph token stream by extracting all the finite strings from the graph - * and using them to create phrase prefix queries with the appropriate slop. + * Creates a span near (phrase) query from a graph token stream. + * The articulation points of the graph are visited in order and the queries + * created at each point are merged in the returned near query. */ - BooleanQuery.Builder builder = new BooleanQuery.Builder(); - Iterator it = graph.getFiniteStrings(); - while (it.hasNext()) { - Query query = createPhrasePrefixQuery(it.next(), field, slop); - if (query != null) { - builder.add(query, BooleanClause.Occur.SHOULD); + List clauses = new ArrayList<>(); + int[] articulationPoints = graph.articulationPoints(); + int lastState = 0; + int maxClauseCount = BooleanQuery.getMaxClauseCount(); + for (int i = 0; i <= articulationPoints.length; i++) { + int start = lastState; + int end = -1; + if (i < articulationPoints.length) { + end = articulationPoints[i]; + } + lastState = end; + final SpanQuery queryPos; + boolean endPrefix = end == -1 && type == Type.PHRASE_PREFIX; + if (graph.hasSidePath(start)) { + List queries = new ArrayList<>(); + Iterator it = graph.getFiniteStrings(start, end); + while (it.hasNext()) { + TokenStream ts = it.next(); + SpanQuery q = createSpanQuery(ts, field, endPrefix); + if (q != null) { + if (queries.size() >= maxClauseCount) { + throw new BooleanQuery.TooManyClauses(); + } + queries.add(q); + } + } + if (queries.size() > 0) { + queryPos = new SpanOrQuery(queries.toArray(new SpanQuery[0])); + } else { + queryPos = null; + } + } else { + Term[] terms = graph.getTerms(field, start); + assert terms.length > 0; + if (terms.length >= maxClauseCount) { + throw new BooleanQuery.TooManyClauses(); + } + queryPos = newSpanQuery(terms, endPrefix); + } + + if (queryPos != null) { + if (clauses.size() >= maxClauseCount) { + throw new BooleanQuery.TooManyClauses(); + } + clauses.add(queryPos); } } - return builder.build(); + + if (clauses.isEmpty()) { + return null; + } else if (clauses.size() == 1) { + return clauses.get(0); + } else { + return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), 0, true); + } } private void checkForPositions(String field) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java index 63d30132ac13f..e527f98f73c20 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/TextFieldMapperTests.java @@ -994,70 +994,94 @@ public void testFastPhrasePrefixes() throws IOException { queryShardContext.getMapperService().merge("type", new CompressedXContent(mapping), MergeReason.MAPPING_UPDATE); - Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").toQuery(queryShardContext); - Query expected = new SpanNearQuery.Builder("field", true) - .addClause(new SpanTermQuery(new Term("field", "two"))) - .addClause(new FieldMaskingSpanQuery( - new SpanTermQuery(new Term("field._index_prefix", "words")), "field") - ) - .build(); - assertThat(q, equalTo(expected)); - - Query q2 = new MatchPhrasePrefixQueryBuilder("field", "three words here").toQuery(queryShardContext); - expected = new SpanNearQuery.Builder("field", true) - .addClause(new SpanTermQuery(new Term("field", "three"))) - .addClause(new SpanTermQuery(new Term("field", "words"))) - .addClause(new FieldMaskingSpanQuery( - new SpanTermQuery(new Term("field._index_prefix", "here")), "field") - ) - .build(); - assertThat(q2, equalTo(expected)); - - Query q3 = new MatchPhrasePrefixQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); - expected = new SpanNearQuery.Builder("field", true) - .setSlop(1) - .addClause(new SpanTermQuery(new Term("field", "two"))) - .addClause(new FieldMaskingSpanQuery( - new SpanTermQuery(new Term("field._index_prefix", "words")), "field") - ) - .build(); - assertThat(q3, equalTo(expected)); - - Query q4 = new MatchPhrasePrefixQueryBuilder("field", "singleton").toQuery(queryShardContext); - assertThat(q4, is(new SynonymQuery(new Term("field._index_prefix", "singleton")))); - - Query q5 = new MatchPhrasePrefixQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); - expected = new SpanNearQuery.Builder("field", true) - .addClause(new SpanTermQuery(new Term("field", "sparkle"))) - .addGap(1) - .addClause(new FieldMaskingSpanQuery( - new SpanTermQuery(new Term("field._index_prefix", "stopword")), "field") - ) - .build(); - assertThat(q5, equalTo(expected)); + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").toQuery(queryShardContext); + Query expected = new SpanNearQuery.Builder("field", true) + .addClause(new SpanTermQuery(new Term("field", "two"))) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "words")), "field") + ) + .build(); + assertThat(q, equalTo(expected)); + } - MatchQuery matchQuery = new MatchQuery(queryShardContext); - matchQuery.setAnalyzer(new MockSynonymAnalyzer()); - Query q6 = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "motor dogs"); - expected = new SpanNearQuery.Builder("synfield", true) - .addClause(new SpanTermQuery(new Term("synfield", "motor"))) - .addClause( - new SpanOrQuery( - new FieldMaskingSpanQuery( - new SpanTermQuery(new Term("synfield._index_prefix", "dogs")), "synfield" - ), - new FieldMaskingSpanQuery( - new SpanTermQuery(new Term("synfield._index_prefix", "dog")), "synfield" + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "three words here").toQuery(queryShardContext); + Query expected = new SpanNearQuery.Builder("field", true) + .addClause(new SpanTermQuery(new Term("field", "three"))) + .addClause(new SpanTermQuery(new Term("field", "words"))) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "here")), "field") + ) + .build(); + assertThat(q, equalTo(expected)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "two words").slop(1).toQuery(queryShardContext); + MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery("field"); + mpq.setSlop(1); + mpq.add(new Term("field", "two")); + mpq.add(new Term("field", "words")); + assertThat(q, equalTo(mpq)); + } + + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "singleton").toQuery(queryShardContext); + assertThat(q, is(new SynonymQuery(new Term("field._index_prefix", "singleton")))); + } + + { + + Query q = new MatchPhrasePrefixQueryBuilder("field", "sparkle a stopword").toQuery(queryShardContext); + Query expected = new SpanNearQuery.Builder("field", true) + .addClause(new SpanTermQuery(new Term("field", "sparkle"))) + .addGap(1) + .addClause(new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("field._index_prefix", "stopword")), "field") + ) + .build(); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "motor dogs"); + Query expected = new SpanNearQuery.Builder("synfield", true) + .addClause(new SpanTermQuery(new Term("synfield", "motor"))) + .addClause( + new SpanOrQuery( + new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("synfield._index_prefix", "dogs")), "synfield" + ), + new FieldMaskingSpanQuery( + new SpanTermQuery(new Term("synfield._index_prefix", "dog")), "synfield" + ) ) ) - ) - .build(); - assertThat(q6, equalTo(expected)); + .build(); + assertThat(q, equalTo(expected)); + } + + { + MatchQuery matchQuery = new MatchQuery(queryShardContext); + matchQuery.setPhraseSlop(1); + matchQuery.setAnalyzer(new MockSynonymAnalyzer()); + Query q = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "synfield", "two dogs"); + MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery("synfield"); + mpq.setSlop(1); + mpq.add(new Term("synfield", "two")); + mpq.add(new Term[] { new Term("synfield", "dogs"), new Term("synfield", "dog") }); + assertThat(q, equalTo(mpq)); + } - Query q7 = matchQuery.parse(MatchQuery.Type.PHRASE_PREFIX, "field", "motor d"); - MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery("field"); - mpq.add(new Term("field", "motor")); - mpq.add(new Term("field", "d")); - assertThat(q7, equalTo(mpq)); + { + Query q = new MatchPhrasePrefixQueryBuilder("field", "motor d").toQuery(queryShardContext); + MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery("field"); + mpq.add(new Term("field", "motor")); + mpq.add(new Term("field", "d")); + assertThat(q, equalTo(mpq)); + } } } diff --git a/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java index 47db7d42d8cd0..4c59e25804a55 100644 --- a/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/SpanMultiTermQueryBuilderTests.java @@ -32,8 +32,8 @@ import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopTermsRewrite; import org.apache.lucene.search.spans.FieldMaskingSpanQuery; -import org.apache.lucene.search.spans.SpanBoostQuery; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; @@ -42,6 +42,7 @@ import org.elasticsearch.common.Strings; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.search.internal.SearchContext; @@ -55,6 +56,7 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; import static org.hamcrest.CoreMatchers.either; +import static org.hamcrest.CoreMatchers.startsWith; public class SpanMultiTermQueryBuilderTests extends AbstractQueryTestCase { @Override @@ -68,6 +70,9 @@ protected void initializeAdditionalMappings(MapperService mapperService) throws .field("type", "alias") .field("path", "prefix_field") .endObject() + .startObject("body") + .field("type", "text") + .endObject() .endObject().endObject().endObject(); mapperService.merge("_doc", @@ -85,23 +90,26 @@ protected void doAssertLuceneQuery(SpanMultiTermQueryBuilder queryBuilder, Query if (query instanceof SpanMatchNoDocsQuery) { return; } - if (queryBuilder.innerQuery().boost() != AbstractQueryBuilder.DEFAULT_BOOST) { - assertThat(query, instanceOf(SpanBoostQuery.class)); - SpanBoostQuery boostQuery = (SpanBoostQuery) query; - assertThat(boostQuery.getBoost(), equalTo(queryBuilder.innerQuery().boost())); - query = boostQuery.getQuery(); - } - assertThat(query, instanceOf(SpanMultiTermQueryWrapper.class)); - SpanMultiTermQueryWrapper spanMultiTermQueryWrapper = (SpanMultiTermQueryWrapper) query; - Query multiTermQuery = queryBuilder.innerQuery().toQuery(context.getQueryShardContext()); - if (queryBuilder.innerQuery().boost() != AbstractQueryBuilder.DEFAULT_BOOST) { - assertThat(multiTermQuery, instanceOf(BoostQuery.class)); - BoostQuery boostQuery = (BoostQuery) multiTermQuery; - multiTermQuery = boostQuery.getQuery(); + assertThat(query, either(instanceOf(SpanMultiTermQueryWrapper.class)).or(instanceOf(FieldMaskingSpanQuery.class))); + if (query instanceof SpanMultiTermQueryWrapper) { + SpanMultiTermQueryWrapper wrapper = (SpanMultiTermQueryWrapper) query; + Query innerQuery = queryBuilder.innerQuery().toQuery(context.getQueryShardContext()); + if (queryBuilder.innerQuery().boost() != AbstractQueryBuilder.DEFAULT_BOOST) { + assertThat(innerQuery, instanceOf(BoostQuery.class)); + BoostQuery boostQuery = (BoostQuery) innerQuery; + innerQuery = boostQuery.getQuery(); + } + assertThat(innerQuery, instanceOf(MultiTermQuery.class)); + MultiTermQuery multiQuery = (MultiTermQuery) innerQuery; + if (multiQuery.getRewriteMethod() instanceof TopTermsRewrite) { + assertThat(wrapper.getRewriteMethod(), instanceOf(SpanMultiTermQueryWrapper.TopTermsSpanBooleanQueryRewrite.class)); + } else { + assertThat(wrapper.getRewriteMethod(), instanceOf(SpanBooleanQueryRewriteWithMaxClause.class)); + } + } else if (query instanceof FieldMaskingSpanQuery) { + FieldMaskingSpanQuery mask = (FieldMaskingSpanQuery) query; + assertThat(mask.getMaskedQuery(), instanceOf(TermQuery.class)); } - assertThat(multiTermQuery, either(instanceOf(MultiTermQuery.class)).or(instanceOf(TermQuery.class))); - assertThat(spanMultiTermQueryWrapper.getWrappedQuery(), - equalTo(new SpanMultiTermQueryWrapper<>((MultiTermQuery) multiTermQuery).getWrappedQuery())); } public void testIllegalArgument() { @@ -168,11 +176,10 @@ public String fieldName() { */ public void testUnsupportedInnerQueryType() throws IOException { MultiTermQueryBuilder query = new TermMultiTermQueryBuilder(); - SpanMultiTermQueryBuilder spamMultiTermQuery = new SpanMultiTermQueryBuilder(query); + SpanMultiTermQueryBuilder spanMultiTermQuery = new SpanMultiTermQueryBuilder(query); UnsupportedOperationException e = expectThrows(UnsupportedOperationException.class, - () -> spamMultiTermQuery.toQuery(createShardContext())); - assertThat(e.getMessage(), containsString("unsupported inner query generated by " + TermMultiTermQueryBuilder.class.getName() + - ", should be " + MultiTermQuery.class.getName())); + () -> spanMultiTermQuery.toQuery(createShardContext())); + assertThat(e.getMessage(), startsWith("unsupported inner query")); } public void testToQueryInnerSpanMultiTerm() throws IOException { @@ -184,50 +191,39 @@ public void testToQueryInnerSpanMultiTerm() throws IOException { public void testToQueryInnerTermQuery() throws IOException { String fieldName = randomFrom("prefix_field", "prefix_field_alias"); final QueryShardContext context = createShardContext(); - if (context.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_4_0)) { - Query query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "foo")) - .toQuery(context); - assertThat(query, instanceOf(FieldMaskingSpanQuery.class)); - FieldMaskingSpanQuery fieldSpanQuery = (FieldMaskingSpanQuery) query; - assertThat(fieldSpanQuery.getField(), equalTo("prefix_field")); - assertThat(fieldSpanQuery.getMaskedQuery(), instanceOf(SpanTermQuery.class)); - SpanTermQuery spanTermQuery = (SpanTermQuery) fieldSpanQuery.getMaskedQuery(); - assertThat(spanTermQuery.getTerm().text(), equalTo("foo")); - - query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "foo")) - .boost(2.0f) - .toQuery(context); - assertThat(query, instanceOf(SpanBoostQuery.class)); - SpanBoostQuery boostQuery = (SpanBoostQuery) query; - assertThat(boostQuery.getBoost(), equalTo(2.0f)); - assertThat(boostQuery.getQuery(), instanceOf(FieldMaskingSpanQuery.class)); - fieldSpanQuery = (FieldMaskingSpanQuery) boostQuery.getQuery(); - assertThat(fieldSpanQuery.getField(), equalTo("prefix_field")); - assertThat(fieldSpanQuery.getMaskedQuery(), instanceOf(SpanTermQuery.class)); - spanTermQuery = (SpanTermQuery) fieldSpanQuery.getMaskedQuery(); - assertThat(spanTermQuery.getTerm().text(), equalTo("foo")); - } else { - Query query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "foo")) - .toQuery(context); + { + Query query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "foo")).toQuery(context); + if (context.getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_4_0)) { + assertThat(query, instanceOf(FieldMaskingSpanQuery.class)); + FieldMaskingSpanQuery fieldQuery = (FieldMaskingSpanQuery) query; + assertThat(fieldQuery.getMaskedQuery(), instanceOf(SpanTermQuery.class)); + assertThat(fieldQuery.getField(), equalTo("prefix_field")); + SpanTermQuery termQuery = (SpanTermQuery) fieldQuery.getMaskedQuery(); + assertThat(termQuery.getTerm().field(), equalTo("prefix_field._index_prefix")); + assertThat(termQuery.getTerm().text(), equalTo("foo")); + } else { + assertThat(query, instanceOf(SpanMultiTermQueryWrapper.class)); + SpanMultiTermQueryWrapper wrapper = (SpanMultiTermQueryWrapper) query; + assertThat(wrapper.getWrappedQuery(), instanceOf(PrefixQuery.class)); + PrefixQuery prefixQuery = (PrefixQuery) wrapper.getWrappedQuery(); + assertThat(prefixQuery.getField(), equalTo("prefix_field")); + assertThat(prefixQuery.getPrefix().text(), equalTo("foo")); + } + } + + { + Query query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "f")).toQuery(context); assertThat(query, instanceOf(SpanMultiTermQueryWrapper.class)); SpanMultiTermQueryWrapper wrapper = (SpanMultiTermQueryWrapper) query; assertThat(wrapper.getWrappedQuery(), instanceOf(PrefixQuery.class)); + assertThat(wrapper.getField(), equalTo("prefix_field")); PrefixQuery prefixQuery = (PrefixQuery) wrapper.getWrappedQuery(); assertThat(prefixQuery.getField(), equalTo("prefix_field")); - assertThat(prefixQuery.getPrefix().text(), equalTo("foo")); - - query = new SpanMultiTermQueryBuilder(new PrefixQueryBuilder(fieldName, "foo")) - .boost(2.0f) - .toQuery(context); - assertThat(query, instanceOf(SpanBoostQuery.class)); - SpanBoostQuery boostQuery = (SpanBoostQuery) query; - assertThat(boostQuery.getBoost(), equalTo(2.0f)); - assertThat(boostQuery.getQuery(), instanceOf(SpanMultiTermQueryWrapper.class)); - wrapper = (SpanMultiTermQueryWrapper) boostQuery.getQuery(); - assertThat(wrapper.getWrappedQuery(), instanceOf(PrefixQuery.class)); - prefixQuery = (PrefixQuery) wrapper.getWrappedQuery(); - assertThat(prefixQuery.getField(), equalTo("prefix_field")); - assertThat(prefixQuery.getPrefix().text(), equalTo("foo")); + assertThat(prefixQuery.getPrefix().text(), equalTo("f")); + assertThat(wrapper.getRewriteMethod(), instanceOf(SpanBooleanQueryRewriteWithMaxClause.class)); + SpanBooleanQueryRewriteWithMaxClause rewrite = (SpanBooleanQueryRewriteWithMaxClause) wrapper.getRewriteMethod(); + assertThat(rewrite.getMaxExpansions(), equalTo(BooleanQuery.getMaxClauseCount())); + assertTrue(rewrite.isHardLimit()); } } @@ -255,17 +251,13 @@ public void testFromJson() throws IOException { } public void testDefaultMaxRewriteBuilder() throws Exception { - Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("foo", "b")). - toQuery(createShardContext()); - - if (query instanceof SpanBoostQuery) { - query = ((SpanBoostQuery)query).getQuery(); - } + Query query = QueryBuilders.spanMultiTermQueryBuilder(QueryBuilders.prefixQuery("body", "b")) + .toQuery(createShardContext()); assertTrue(query instanceof SpanMultiTermQueryWrapper); if (query instanceof SpanMultiTermQueryWrapper) { - MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod(); - assertTrue(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause); + MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper) query).getRewriteMethod(); + assertTrue(rewriteMethod instanceof SpanBooleanQueryRewriteWithMaxClause); } } @@ -285,7 +277,6 @@ public void testTermExpansionExceptionOnSpanFailure() throws Exception { Query query = queryBuilder.toQuery(createShardContext(reader)); RuntimeException exc = expectThrows(RuntimeException.class, () -> query.rewrite(reader)); assertThat(exc.getMessage(), containsString("maxClauseCount")); - } finally { BooleanQuery.setMaxClauseCount(origBoolMaxClauseCount); } @@ -296,17 +287,13 @@ public void testTermExpansionExceptionOnSpanFailure() throws Exception { public void testTopNMultiTermsRewriteInsideSpan() throws Exception { Query query = QueryBuilders.spanMultiTermQueryBuilder( - QueryBuilders.prefixQuery("foo", "b").rewrite("top_terms_boost_2000") + QueryBuilders.prefixQuery("body", "b").rewrite("top_terms_boost_2000") ).toQuery(createShardContext()); - if (query instanceof SpanBoostQuery) { - query = ((SpanBoostQuery)query).getQuery(); - } - assertTrue(query instanceof SpanMultiTermQueryWrapper); if (query instanceof SpanMultiTermQueryWrapper) { MultiTermQuery.RewriteMethod rewriteMethod = ((SpanMultiTermQueryWrapper)query).getRewriteMethod(); - assertFalse(rewriteMethod instanceof SpanMultiTermQueryBuilder.TopTermSpanBooleanQueryRewriteWithMaxClause); + assertFalse(rewriteMethod instanceof SpanBooleanQueryRewriteWithMaxClause); } } From 5e94ad0c756405d737fdcb67e4b6c23657ac5feb Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Wed, 16 Jan 2019 09:39:26 +0100 Subject: [PATCH 3/5] ensure that field is not null in MultiPhrasePrefixQuery --- .../common/lucene/search/MultiPhrasePrefixQuery.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java b/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java index 06b7774764050..57f60add714a1 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java @@ -39,6 +39,7 @@ import java.util.Iterator; import java.util.List; import java.util.ListIterator; +import java.util.Objects; public class MultiPhrasePrefixQuery extends Query { @@ -50,7 +51,7 @@ public class MultiPhrasePrefixQuery extends Query { private int slop = 0; public MultiPhrasePrefixQuery(String field) { - this.field = field; + this.field = Objects.requireNonNull(field); } /** From 1217e8ece6f8f3575bc5bffc0f4775bfad119ed8 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Wed, 16 Jan 2019 22:43:57 +0100 Subject: [PATCH 4/5] apply boost when needed --- .../elasticsearch/index/search/MultiMatchQuery.java | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java index ed883609426b4..7eefaadaadde2 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java @@ -136,6 +136,7 @@ private List buildCrossFieldQuery(MultiMatchQueryBuilder.Type type, Map buildCrossFieldQuery(MultiMatchQueryBuilder.Type type, Map disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { Query query = fieldType.fieldType.phraseQuery(stream, slop, enablePositionIncrements); - if (fieldType.boost != AbstractQueryBuilder.DEFAULT_BOOST) { + if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } disjunctions.add(query); @@ -194,7 +202,7 @@ protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) t List disjunctions = new ArrayList<>(); for (FieldAndBoost fieldType : blendedFields) { Query query = fieldType.fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements); - if (fieldType.boost != AbstractQueryBuilder.DEFAULT_BOOST) { + if (fieldType.boost != 1f) { query = new BoostQuery(query, fieldType.boost); } disjunctions.add(query); From 22829c60c37330076f92556d51472922b99f91c5 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Wed, 16 Jan 2019 23:32:01 +0100 Subject: [PATCH 5/5] address more feedback --- .../java/org/elasticsearch/index/mapper/TextFieldMapper.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 38ca2d49e72bb..e5fc470e130bc 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; @@ -749,6 +750,7 @@ private Query analyzePhrasePrefix(TokenStream stream, int slop, int maxExpansion } private static boolean hasGaps(TokenStream stream) throws IOException { + assert stream instanceof CachingTokenFilter; PositionIncrementAttribute posIncAtt = stream.getAttribute(PositionIncrementAttribute.class); stream.reset(); while (stream.incrementToken()) {