diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index f4120bea5372a..571c49be11bb6 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -30,6 +30,7 @@ import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermInSetQuery; @@ -38,6 +39,8 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.joda.DateMathParser; +import org.elasticsearch.common.logging.DeprecationLogger; +import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.lucene.all.AllTermQuery; import org.elasticsearch.common.unit.Fuzziness; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -45,7 +48,6 @@ import org.elasticsearch.index.query.QueryRewriteContext; import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.QueryShardException; -import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.search.DocValueFormat; import org.joda.time.DateTimeZone; @@ -385,11 +387,11 @@ public Query nullValueQuery() { public abstract Query existsQuery(QueryShardContext context); public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { - throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); + throw new IllegalArgumentException("Attempted to build a phrase query with multiple terms against non-text field [" + name + "]"); } public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { - throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); + throw new IllegalArgumentException("Attempted to build a phrase query with multiple terms against non-text field [" + name + "]"); } /** diff --git a/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java index 37834b93a1e0f..b19afe34bbe93 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/StringFieldType.java @@ -19,9 +19,17 @@ package org.elasticsearch.index.mapper; +import java.io.IOException; +import java.util.ArrayList; import java.util.List; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.shingle.FixedShingleFilter; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.Term; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiTermQuery; @@ -93,4 +101,64 @@ public Query rangeQuery(Object lowerTerm, Object upperTerm, boolean includeLower upperTerm == null ? null : indexedValueForSearch(upperTerm), includeLower, includeUpper); } + + @Override + public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { + + PhraseQuery.Builder builder = new PhraseQuery.Builder(); + builder.setSlop(slop); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + int position = -1; + + stream.reset(); + while (stream.incrementToken()) { + if (enablePosIncrements) { + position += posIncrAtt.getPositionIncrement(); + } + else { + position += 1; + } + builder.add(new Term(field, termAtt.getBytesRef()), position); + } + + return builder.build(); + } + + @Override + public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { + + MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder(); + mpqb.setSlop(slop); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + int position = -1; + + List multiTerms = new ArrayList<>(); + stream.reset(); + while (stream.incrementToken()) { + int positionIncrement = posIncrAtt.getPositionIncrement(); + + if (positionIncrement > 0 && multiTerms.size() > 0) { + if (enablePositionIncrements) { + mpqb.add(multiTerms.toArray(new Term[0]), position); + } else { + mpqb.add(multiTerms.toArray(new Term[0])); + } + multiTerms.clear(); + } + position += positionIncrement; + multiTerms.add(new Term(field, termAtt.getBytesRef())); + } + + if (enablePositionIncrements) { + mpqb.add(multiTerms.toArray(new Term[0]), position); + } else { + mpqb.add(multiTerms.toArray(new Term[0])); + } + return mpqb.build(); + } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index d3ac943c088d7..c4743d97dbff7 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -43,6 +43,7 @@ import org.apache.lucene.search.TermQuery; import org.elasticsearch.Version; import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.logging.ESLoggerFactory; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -632,72 +633,22 @@ public Query nullValueQuery() { return termQuery(nullValue(), null); } + @Override public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { - if (indexPhrases && slop == 0 && hasGaps(cache(stream)) == false) { stream = new FixedShingleFilter(stream, 2); field = field + FAST_PHRASE_SUFFIX; } - PhraseQuery.Builder builder = new PhraseQuery.Builder(); - builder.setSlop(slop); - - TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); - int position = -1; - - stream.reset(); - while (stream.incrementToken()) { - if (enablePosIncrements) { - position += posIncrAtt.getPositionIncrement(); - } - else { - position += 1; - } - builder.add(new Term(field, termAtt.getBytesRef()), position); - } - - return builder.build(); + return super.phraseQuery(field, stream, slop, enablePosIncrements); } @Override public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolean enablePositionIncrements) throws IOException { - if (indexPhrases && slop == 0 && hasGaps(cache(stream)) == false) { stream = new FixedShingleFilter(stream, 2); field = field + FAST_PHRASE_SUFFIX; } - - MultiPhraseQuery.Builder mpqb = new MultiPhraseQuery.Builder(); - mpqb.setSlop(slop); - - TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - - PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); - int position = -1; - - List multiTerms = new ArrayList<>(); - stream.reset(); - while (stream.incrementToken()) { - int positionIncrement = posIncrAtt.getPositionIncrement(); - - if (positionIncrement > 0 && multiTerms.size() > 0) { - if (enablePositionIncrements) { - mpqb.add(multiTerms.toArray(new Term[0]), position); - } else { - mpqb.add(multiTerms.toArray(new Term[0])); - } - multiTerms.clear(); - } - position += positionIncrement; - multiTerms.add(new Term(field, termAtt.getBytesRef())); - } - - if (enablePositionIncrements) { - mpqb.add(multiTerms.toArray(new Term[0]), position); - } else { - mpqb.add(multiTerms.toArray(new Term[0])); - } - return mpqb.build(); + return super.multiPhraseQuery(field, stream, slop, enablePositionIncrements); } private static CachingTokenFilter cache(TokenStream in) { diff --git a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java index f6449771c13ec..6ba69b463b28e 100644 --- a/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/server/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -20,8 +20,8 @@ package org.elasticsearch.index.search; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.queries.ExtendedCommonTermsQuery; @@ -29,7 +29,6 @@ import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; -import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiTermQuery; @@ -49,6 +48,8 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.logging.DeprecationLogger; +import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.lucene.all.AllTermQuery; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.common.lucene.search.Queries; @@ -66,6 +67,8 @@ public class MatchQuery { + private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(MappedFieldType.class)); + public enum Type implements Writeable { /** * The text is analyzed and terms are added to a boolean query. @@ -266,7 +269,7 @@ public Query parse(Type type, String fieldName, Object value) throws IOException */ boolean noForcedAnalyzer = this.analyzer == null; if (fieldType.tokenized() == false && noForcedAnalyzer && - fieldType instanceof KeywordFieldMapper.KeywordFieldType == false) { + fieldType instanceof KeywordFieldMapper.KeywordFieldType == false) { return blendTermQuery(new Term(fieldName, value.toString()), fieldType); } @@ -354,38 +357,53 @@ protected Query newSynonymQuery(Term[] terms) { @Override protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { - IllegalStateException e = checkForPositions(field); - if (e != null) { + try { + checkForPositions(field); + Query query = mapper.phraseQuery(field, stream, slop, enablePositionIncrements); + if (query instanceof PhraseQuery) { + // synonyms that expand to multiple terms can return a phrase query. + return blendPhraseQuery((PhraseQuery) query, mapper); + } + return query; + } + catch (IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); } throw e; } - Query query = mapper.phraseQuery(field, stream, slop, enablePositionIncrements); - if (query instanceof PhraseQuery) { - // synonyms that expand to multiple terms can return a phrase query. - return blendPhraseQuery((PhraseQuery) query, mapper); + catch (IllegalArgumentException e) { + if (lenient == false) { + DEPRECATION_LOGGER.deprecated(e.getMessage()); + } + return newLenientFieldQuery(field, e); } - return query; } @Override protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { - IllegalStateException e = checkForPositions(field); - if (e != null) { + try { + checkForPositions(field); + return mapper.multiPhraseQuery(field, stream, slop, enablePositionIncrements); + } + catch (IllegalStateException e) { if (lenient) { return newLenientFieldQuery(field, e); } throw e; } - return mapper.multiPhraseQuery(field, stream, slop, enablePositionIncrements); + catch (IllegalArgumentException e) { + if (lenient == false) { + DEPRECATION_LOGGER.deprecated(e.getMessage()); + } + return newLenientFieldQuery(field, e); + } } - private IllegalStateException checkForPositions(String field) { + private void checkForPositions(String field) { if (hasPositions(mapper) == false) { - return new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); + throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery"); } - return null; } /** @@ -467,9 +485,9 @@ private Query toSpanQueryPrefix(SpanQuery query, float boost) { } else if (query instanceof SpanNearQuery) { SpanNearQuery spanNearQuery = (SpanNearQuery) query; SpanQuery[] clauses = spanNearQuery.getClauses(); - if (clauses[clauses.length-1] instanceof SpanTermQuery) { - clauses[clauses.length-1] = new SpanMultiTermQueryWrapper<>( - new PrefixQuery(((SpanTermQuery) clauses[clauses.length-1]).getTerm()) + if (clauses[clauses.length - 1] instanceof SpanTermQuery) { + clauses[clauses.length - 1] = new SpanMultiTermQueryWrapper<>( + new PrefixQuery(((SpanTermQuery) clauses[clauses.length - 1]).getTerm()) ); } SpanNearQuery newQuery = new SpanNearQuery(clauses, spanNearQuery.getSlop(), spanNearQuery.isInOrder()); diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java index 00701adc449c3..f2f62411e5f30 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchPhrasePrefixQueryBuilderTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.query; +import com.carrotsearch.randomizedtesting.annotations.Seed; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexOrDocValuesQuery; import org.apache.lucene.search.MatchNoDocsQuery; @@ -61,7 +62,7 @@ protected MatchPhrasePrefixQueryBuilder doCreateTestQueryBuilder() { MatchPhrasePrefixQueryBuilder matchQuery = new MatchPhrasePrefixQueryBuilder(fieldName, value); - if (randomBoolean()) { + if (randomBoolean() && fieldName.equals(STRING_FIELD_NAME)) { matchQuery.analyzer(randomFrom("simple", "keyword", "whitespace")); } @@ -99,15 +100,6 @@ protected void doAssertLuceneQuery(MatchPhrasePrefixQueryBuilder queryBuilder, Q .or(instanceOf(IndexOrDocValuesQuery.class)).or(instanceOf(MatchNoDocsQuery.class))); } - /** - * Overridden to allow for annotating with @AwaitsFix. Please remove this method after fixing. - */ - @Override - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/31061") - public void testToQuery() throws IOException { - super.testToQuery(); - } - public void testIllegalValues() { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new MatchPhrasePrefixQueryBuilder(null, "value")); assertEquals("[match_phrase_prefix] requires fieldName", e.getMessage()); @@ -119,6 +111,14 @@ public void testIllegalValues() { e = expectThrows(IllegalArgumentException.class, () -> matchQuery.maxExpansions(-1)); } + public void testPhraseOnFieldWithNoTerms() throws IOException { + assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); + MatchPhrasePrefixQueryBuilder matchQuery = new MatchPhrasePrefixQueryBuilder(DATE_FIELD_NAME, "three term phrase"); + matchQuery.analyzer("whitespace"); + matchQuery.doToQuery(createShardContext()); + assertWarnings("Attempted to build a phrase query with multiple terms against non-text field [" + DATE_FIELD_NAME + "]"); + } + public void testBadAnalyzer() throws IOException { MatchPhrasePrefixQueryBuilder matchQuery = new MatchPhrasePrefixQueryBuilder("fieldName", "text"); matchQuery.analyzer("bogusAnalyzer"); diff --git a/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java index 91a775dbf0256..7359b58d324df 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MatchPhraseQueryBuilderTests.java @@ -64,7 +64,7 @@ protected MatchPhraseQueryBuilder doCreateTestQueryBuilder() { MatchPhraseQueryBuilder matchQuery = new MatchPhraseQueryBuilder(fieldName, value); - if (randomBoolean()) { + if (randomBoolean() && fieldName.equals(STRING_FIELD_NAME)) { matchQuery.analyzer(randomFrom("simple", "keyword", "whitespace")); } @@ -107,15 +107,6 @@ protected void doAssertLuceneQuery(MatchPhraseQueryBuilder queryBuilder, Query q .or(instanceOf(IndexOrDocValuesQuery.class)).or(instanceOf(MatchNoDocsQuery.class))); } - /** - * Overridden to allow for annotating with @AwaitsFix. Please remove this method after fixing. - */ - @Override - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/31061") - public void testToQuery() throws IOException { - super.testToQuery(); - } - public void testIllegalValues() { IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new MatchPhraseQueryBuilder(null, "value")); assertEquals("[match_phrase] requires fieldName", e.getMessage()); @@ -131,6 +122,14 @@ public void testBadAnalyzer() throws IOException { assertThat(e.getMessage(), containsString("analyzer [bogusAnalyzer] not found")); } + public void testPhraseOnFieldWithNoTerms() throws IOException { + assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); + MatchPhraseQueryBuilder matchQuery = new MatchPhraseQueryBuilder(DATE_FIELD_NAME, "three term phrase"); + matchQuery.analyzer("whitespace"); + matchQuery.doToQuery(createShardContext()); + assertWarnings("Attempted to build a phrase query with multiple terms against non-text field [" + DATE_FIELD_NAME + "]"); + } + public void testFromSimpleJson() throws IOException { String json1 = "{\n" + " \"match_phrase\" : {\n" + diff --git a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java index 546dc42585375..6d390ea8aeba8 100644 --- a/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/MultiMatchQueryBuilderTests.java @@ -60,7 +60,6 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; -@LuceneTestCase.AwaitsFix(bugUrl="https://github.com/elastic/elasticsearch/issues/31093") public class MultiMatchQueryBuilderTests extends AbstractQueryTestCase { private static final String MISSING_WILDCARD_FIELD_NAME = "missing_*";