From bd1fcf5b9a1f21e848ff80a91fb1383c36ee4344 Mon Sep 17 00:00:00 2001 From: Adrien Grand Date: Wed, 7 Apr 2021 17:52:13 +0200 Subject: [PATCH] Make intervals queries fully pluggable through field mappers. `MappedFieldType` only allows configuring `match` and `prefix` queries today. This change makes it possible to configure how to create `wildcard` and `fuzzy` queries as well. This will allow making the upcoming `match_only_text` field fully support intervals queries. --- .../AnnotatedTextFieldTypeTests.java | 9 +- .../index/mapper/MappedFieldType.java | 31 ++++++- .../index/mapper/TextFieldMapper.java | 47 +++++++--- .../index/query/IntervalBuilder.java | 11 ++- .../index/query/IntervalsSourceProvider.java | 87 +++++++++---------- .../index/query/IntervalBuilderTests.java | 10 ++- .../query/IntervalQueryBuilderTests.java | 10 +-- 7 files changed, 126 insertions(+), 79 deletions(-) diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java index 7de8a93ac38bc..979cca76c8059 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldTypeTests.java @@ -8,11 +8,9 @@ package org.elasticsearch.index.mapper.annotatedtext; -import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.queries.intervals.Intervals; import org.apache.lucene.queries.intervals.IntervalsSource; -import org.elasticsearch.index.analysis.AnalyzerScope; -import org.elasticsearch.index.analysis.NamedAnalyzer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.mapper.ContentPath; import org.elasticsearch.index.mapper.FieldTypeTestCase; import org.elasticsearch.index.mapper.MappedFieldType; @@ -25,9 +23,8 @@ public class AnnotatedTextFieldTypeTests extends FieldTypeTestCase { public void testIntervals() throws IOException { MappedFieldType ft = new AnnotatedTextFieldMapper.AnnotatedTextFieldType("field", Collections.emptyMap()); - NamedAnalyzer a = new NamedAnalyzer("name", AnalyzerScope.INDEX, new StandardAnalyzer()); - IntervalsSource source = ft.intervals("Donald Trump", 0, true, a, false); - assertEquals(Intervals.phrase(Intervals.term("donald"), Intervals.term("trump")), source); + IntervalsSource source = ft.termIntervals(new BytesRef("donald"), null); + assertEquals(Intervals.term("donald"), source); } public void testFetchSourceValue() throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index da3c185d3107d..2970316ecfa4e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -32,7 +32,6 @@ import org.elasticsearch.common.geo.ShapeRelation; import org.elasticsearch.common.time.DateMathParser; import org.elasticsearch.common.unit.Fuzziness; -import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.query.DistanceFeatureQueryBuilder; import org.elasticsearch.index.query.QueryRewriteContext; @@ -279,10 +278,34 @@ public Query distanceFeatureQuery(Object origin, String pivot, SearchExecutionCo } /** - * Create an {@link IntervalsSource} to be used for proximity queries + * Create an {@link IntervalsSource} for the given term. */ - public IntervalsSource intervals(String query, int max_gaps, boolean ordered, - NamedAnalyzer analyzer, boolean prefix) throws IOException { + public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext context) { + throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name + + "] which is of type [" + typeName() + "]"); + } + + /** + * Create an {@link IntervalsSource} for the given prefix. + */ + public IntervalsSource prefixIntervals(BytesRef prefix, SearchExecutionContext context) { + throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name + + "] which is of type [" + typeName() + "]"); + } + + /** + * Create a fuzzy {@link IntervalsSource} for the given term. + */ + public IntervalsSource fuzzyIntervals(String term, int maxDistance, int prefixLength, + boolean transpositions, SearchExecutionContext context) { + throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name + + "] which is of type [" + typeName() + "]"); + } + + /** + * Create a wildcard {@link IntervalsSource} for the given pattern. + */ + public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) { throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 746a91a0fdf35..97112625ff530 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -28,6 +28,7 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; @@ -58,7 +59,6 @@ import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; import org.elasticsearch.index.mapper.Mapper.TypeParser.ParserContext; -import org.elasticsearch.index.query.IntervalBuilder; import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.index.similarity.SimilarityProvider; import org.elasticsearch.search.aggregations.support.CoreValuesSourceType; @@ -676,23 +676,44 @@ public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRew } @Override - public IntervalsSource intervals(String text, int maxGaps, boolean ordered, - NamedAnalyzer analyzer, boolean prefix) throws IOException { + public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext context) { if (getTextSearchInfo().hasPositions() == false) { throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed"); } - if (analyzer == null) { - analyzer = getTextSearchInfo().getSearchAnalyzer(); + return Intervals.term(term); + } + + @Override + public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) { + if (getTextSearchInfo().hasPositions() == false) { + throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed"); } - if (prefix) { - BytesRef normalizedTerm = analyzer.normalize(name(), text); - if (prefixFieldType != null) { - return prefixFieldType.intervals(normalizedTerm); - } - return Intervals.prefix(normalizedTerm); + if (prefixFieldType != null) { + return prefixFieldType.intervals(term); + } + return Intervals.prefix(term); + } + + @Override + public IntervalsSource fuzzyIntervals(String term, int maxDistance, int prefixLength, + boolean transpositions, SearchExecutionContext context) { + if (getTextSearchInfo().hasPositions() == false) { + throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed"); + } + FuzzyQuery fq = new FuzzyQuery(new Term(name(), term), + maxDistance, prefixLength, 128, transpositions); + return Intervals.multiterm(fq.getAutomata(), term); + } + + @Override + public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) { + if (getTextSearchInfo().hasPositions() == false) { + throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed"); + } + if (prefixFieldType != null) { + return prefixFieldType.intervals(pattern); } - IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? getTextSearchInfo().getSearchAnalyzer() : analyzer); - return builder.analyzeText(text, maxGaps, ordered); + return Intervals.wildcard(pattern); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java index 0f0bf4ffe39dd..772192a1f908d 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -34,7 +34,7 @@ /** * Constructs an IntervalsSource based on analyzed text */ -public class IntervalBuilder { +public abstract class IntervalBuilder { private final String field; private final Analyzer analyzer; @@ -44,6 +44,9 @@ public IntervalBuilder(String field, Analyzer analyzer) { this.analyzer = analyzer; } + /** Create term intervals for the provided term. */ + protected abstract IntervalsSource termIntervals(BytesRef term); + public IntervalsSource analyzeText(String query, int maxGaps, boolean ordered) throws IOException { try (TokenStream ts = analyzer.tokenStream(field, query); CachingTokenFilter stream = new CachingTokenFilter(ts)) { @@ -109,7 +112,7 @@ protected IntervalsSource analyzeTerm(TokenStream ts) throws IOException { TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); ts.reset(); ts.incrementToken(); - return Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef())); + return termIntervals(BytesRef.deepCopyOf(bytesAtt.getBytesRef())); } protected static IntervalsSource combineSources(List sources, int maxGaps, boolean ordered) { @@ -138,7 +141,7 @@ protected List analyzeTerms(TokenStream ts) throws IOException while (ts.incrementToken()) { BytesRef term = bytesAtt.getBytesRef(); int precedingSpaces = posAtt.getPositionIncrement() - 1; - terms.add(extend(Intervals.term(BytesRef.deepCopyOf(term)), precedingSpaces)); + terms.add(extend(termIntervals(BytesRef.deepCopyOf(term)), precedingSpaces)); } ts.end(); return terms; @@ -170,7 +173,7 @@ else if (synonyms.size() > 1) { synonyms.clear(); spaces = posInc - 1; } - synonyms.add(Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef()))); + synonyms.add(termIntervals(BytesRef.deepCopyOf(bytesAtt.getBytesRef()))); } if (synonyms.size() == 1) { terms.add(extend(synonyms.get(0), spaces)); diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 858fbab45221c..e81699a057a8a 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -8,12 +8,10 @@ package org.elasticsearch.index.query; -import org.apache.lucene.index.Term; import org.apache.lucene.queries.intervals.FilteredIntervalsSource; import org.apache.lucene.queries.intervals.IntervalIterator; import org.apache.lucene.queries.intervals.Intervals; import org.apache.lucene.queries.intervals.IntervalsSource; -import org.apache.lucene.search.FuzzyQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.common.ParseField; @@ -128,23 +126,36 @@ public Match(StreamInput in) throws IOException { } } + private IntervalsSource intervals(MappedFieldType fieldType, String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer, + SearchExecutionContext context) throws IOException { + IntervalBuilder builder = new IntervalBuilder(fieldType.name(), analyzer) { + @Override + protected IntervalsSource termIntervals(BytesRef term) { + return fieldType.termIntervals(term, context); + } + }; + return builder.analyzeText(text, maxGaps, ordered); + } + @Override public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType fieldType) throws IOException { NamedAnalyzer analyzer = null; if (this.analyzer != null) { analyzer = context.getIndexAnalyzers().get(this.analyzer); } - IntervalsSource source; if (useField != null) { fieldType = context.getFieldType(useField); assert fieldType != null; - source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false)); } - else { - source = fieldType.intervals(query, maxGaps, ordered, analyzer, false); + if (analyzer == null) { + analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer(); + } + IntervalsSource source = intervals(fieldType, query, maxGaps, ordered, analyzer, context); + if (useField != null) { + source = Intervals.fixField(useField, source); } if (filter != null) { - return filter.filter(source, context, fieldType); + source = filter.filter(source, context, fieldType); } return source; } @@ -517,14 +528,17 @@ public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType if (this.analyzer != null) { analyzer = context.getIndexAnalyzers().get(this.analyzer); } - IntervalsSource source; if (useField != null) { fieldType = context.getFieldType(useField); assert fieldType != null; - source = Intervals.fixField(useField, fieldType.intervals(prefix, 0, false, analyzer, true)); } - else { - source = fieldType.intervals(prefix, 0, false, analyzer, true); + if (analyzer == null) { + analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer(); + } + final BytesRef prefixTerm = analyzer.normalize(fieldType.name(), prefix); + IntervalsSource source = fieldType.prefixIntervals(prefixTerm, context); + if (useField != null) { + source = Intervals.fixField(useField, source); } return source; } @@ -628,33 +642,23 @@ public Wildcard(StreamInput in) throws IOException { @Override public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType fieldType) { - NamedAnalyzer analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer(); + NamedAnalyzer analyzer = null; if (this.analyzer != null) { analyzer = context.getIndexAnalyzers().get(this.analyzer); } - IntervalsSource source; if (useField != null) { fieldType = context.getFieldType(useField); assert fieldType != null; - checkPositions(fieldType); - if (this.analyzer == null) { - analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer(); - } - BytesRef normalizedTerm = analyzer.normalize(useField, pattern); - source = Intervals.fixField(useField, Intervals.wildcard(normalizedTerm)); } - else { - checkPositions(fieldType); - BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), pattern); - source = Intervals.wildcard(normalizedTerm); + if (analyzer == null) { + analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer(); } - return source; - } - - private void checkPositions(MappedFieldType type) { - if (type.getTextSearchInfo().hasPositions() == false) { - throw new IllegalArgumentException("Cannot create intervals over field [" + type.name() + "] with no positions indexed"); + BytesRef normalizedPattern = analyzer.normalize(fieldType.name(), pattern); + IntervalsSource source = fieldType.wildcardIntervals(normalizedPattern, context); + if (useField != null) { + source = Intervals.fixField(useField, source); } + return source; } @Override @@ -765,36 +769,27 @@ public Fuzzy(StreamInput in) throws IOException { @Override public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType fieldType) { - NamedAnalyzer analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer(); + NamedAnalyzer analyzer = null; if (this.analyzer != null) { analyzer = context.getIndexAnalyzers().get(this.analyzer); } - IntervalsSource source; if (useField != null) { fieldType = context.getFieldType(useField); assert fieldType != null; - checkPositions(fieldType); - if (this.analyzer == null) { - analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer(); - } } - checkPositions(fieldType); - BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), term); - FuzzyQuery fq = new FuzzyQuery(new Term(fieldType.name(), normalizedTerm), - fuzziness.asDistance(term), prefixLength, 128, transpositions); - source = Intervals.multiterm(fq.getAutomata(), term); + if (analyzer == null) { + analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer(); + } + // Fuzzy queries only work with unicode content so it's legal to call utf8ToString here. + String normalizedTerm = analyzer.normalize(fieldType.name(), term).utf8ToString(); + IntervalsSource source = fieldType.fuzzyIntervals(normalizedTerm, fuzziness.asDistance(term), + prefixLength, transpositions, context); if (useField != null) { source = Intervals.fixField(useField, source); } return source; } - private void checkPositions(MappedFieldType type) { - if (type.getTextSearchInfo().hasPositions() == false) { - throw new IllegalArgumentException("Cannot create intervals over field [" + type.name() + "] with no positions indexed"); - } - } - @Override public void extractFields(Set fields) { if (useField != null) { diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java index 836ef569e4eab..9851d906369ea 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java @@ -14,13 +14,21 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.queries.intervals.Intervals; import org.apache.lucene.queries.intervals.IntervalsSource; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.test.ESTestCase; import java.io.IOException; public class IntervalBuilderTests extends ESTestCase { - private static final IntervalBuilder BUILDER = new IntervalBuilder("field1", new StandardAnalyzer()); + private static final IntervalBuilder BUILDER = new IntervalBuilder("field1", new StandardAnalyzer()) { + + @Override + protected IntervalsSource termIntervals(BytesRef term) { + return Intervals.term(term); + } + + }; public void testSimpleTerm() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index 2421542740ad1..77c999f45cd7a 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -535,28 +535,28 @@ public void testFuzzy() throws IOException { IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); Query expected = new IntervalQuery(TEXT_FIELD_NAME, - buildFuzzySource("term", "Term", FuzzyQueryBuilder.DEFAULT_PREFIX_LENGTH, true, Fuzziness.AUTO.asDistance("term"))); + buildFuzzySource("term", "term", FuzzyQueryBuilder.DEFAULT_PREFIX_LENGTH, true, Fuzziness.AUTO.asDistance("term"))); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); String json_with_prefix = "{ \"intervals\" : { \"" + TEXT_FIELD_NAME + "\": { " + "\"fuzzy\" : { \"term\" : \"Term\", \"prefix_length\" : 2 } } } }"; builder = (IntervalQueryBuilder) parseQuery(json_with_prefix); expected = new IntervalQuery(TEXT_FIELD_NAME, - buildFuzzySource("term", "Term", 2, true, Fuzziness.AUTO.asDistance("term"))); + buildFuzzySource("term", "term", 2, true, Fuzziness.AUTO.asDistance("term"))); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); String json_with_fuzziness = "{ \"intervals\" : { \"" + TEXT_FIELD_NAME + "\": { " + "\"fuzzy\" : { \"term\" : \"Term\", \"prefix_length\" : 2, \"fuzziness\" : \"1\" } } } }"; builder = (IntervalQueryBuilder) parseQuery(json_with_fuzziness); expected = new IntervalQuery(TEXT_FIELD_NAME, - buildFuzzySource("term", "Term", 2, true, Fuzziness.ONE.asDistance("term"))); + buildFuzzySource("term", "term", 2, true, Fuzziness.ONE.asDistance("term"))); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); String json_no_transpositions = "{ \"intervals\" : { \"" + TEXT_FIELD_NAME + "\": { " + "\"fuzzy\" : { \"term\" : \"Term\", \"prefix_length\" : 2, \"transpositions\" : false } } } }"; builder = (IntervalQueryBuilder) parseQuery(json_no_transpositions); expected = new IntervalQuery(TEXT_FIELD_NAME, - buildFuzzySource("term", "Term", 2, false, Fuzziness.AUTO.asDistance("term"))); + buildFuzzySource("term", "term", 2, false, Fuzziness.AUTO.asDistance("term"))); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); String json_with_analyzer = "{ \"intervals\" : { \"" + TEXT_FIELD_NAME + "\": { " + @@ -571,7 +571,7 @@ public void testFuzzy() throws IOException { "\"use_field\" : \"" + MASKED_FIELD + "\" } } } }"; builder = (IntervalQueryBuilder) parseQuery(json_with_fixfield); expected = new IntervalQuery(TEXT_FIELD_NAME, Intervals.fixField(MASKED_FIELD, - buildFuzzySource("term", "Term", 2, true, Fuzziness.ONE.asDistance("term")))); + buildFuzzySource("term", "term", 2, true, Fuzziness.ONE.asDistance("term")))); assertEquals(expected, builder.toQuery(createSearchExecutionContext())); }