From 6b7d175e5acc2a855464b11d7030ff024182263d Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 26 Jul 2018 14:13:34 +0100 Subject: [PATCH 01/29] Add IntervalQueryBuilder with support for match and combine intervals --- .../index/mapper/MappedFieldType.java | 4 + .../index/mapper/TextFieldMapper.java | 5 + .../index/query/IntervalQueryBuilder.java | 87 +++++ .../index/query/IntervalsSourceProvider.java | 351 ++++++++++++++++++ .../elasticsearch/plugins/SearchPlugin.java | 24 +- .../elasticsearch/search/SearchModule.java | 19 + .../query/IntervalQueryBuilderTests.java | 107 ++++++ 7 files changed, 596 insertions(+), 1 deletion(-) create mode 100644 server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java create mode 100644 server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java create mode 100644 server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 5f3f4a4de49d6..0e433ef67e5e0 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -362,6 +362,10 @@ public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolea throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } + public TokenStream tokenize(String field, String text) { + throw new IllegalArgumentException("Can only tokenize text on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); + } + /** * An enum used to describe the relation between the range of terms in a * shard when compared with a query range diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 29f1cbb721feb..64611db29b7e2 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -582,6 +582,11 @@ public Query existsQuery(QueryShardContext context) { } } + @Override + public TokenStream tokenize(String field, String text) { + return searchAnalyzer().tokenStream(field, text); + } + @Override public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java new file mode 100644 index 0000000000000..1b0ac3363f558 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java @@ -0,0 +1,87 @@ +package org.elasticsearch.index.query; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.intervals.IntervalQuery; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.mapper.MappedFieldType; + +import java.io.IOException; +import java.util.Objects; + +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; + +public class IntervalQueryBuilder extends AbstractQueryBuilder { + + public static final String NAME = "intervals"; + + private final String field; + private final IntervalsSourceProvider sourceProvider; + + public IntervalQueryBuilder(String field, IntervalsSourceProvider sourceProvider) { + this.field = field; + this.sourceProvider = sourceProvider; + } + + public IntervalQueryBuilder(StreamInput in) throws IOException { + super(in); + this.field = in.readString(); + this.sourceProvider = in.readNamedWriteable(IntervalsSourceProvider.class); + } + + @Override + protected void doWriteTo(StreamOutput out) throws IOException { + out.writeString(field); + out.writeNamedWriteable(sourceProvider); + } + + @Override + protected void doXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(NAME); + builder.field("field", field); + builder.field("source", sourceProvider); + printBoostAndQueryName(builder); + builder.endObject(); + } + + private static final ConstructingObjectParser PARSER + = new ConstructingObjectParser<>(NAME, args -> new IntervalQueryBuilder((String) args[0], (IntervalsSourceProvider) args[1])); + static { + PARSER.declareString(constructorArg(), new ParseField("field")); + PARSER.declareObject(constructorArg(), (parser, c) -> IntervalsSourceProvider.fromXContent(parser), new ParseField("source")); + PARSER.declareFloat(IntervalQueryBuilder::boost, new ParseField("boost")); + PARSER.declareString(IntervalQueryBuilder::queryName, new ParseField("_name")); + } + + public static IntervalQueryBuilder fromXContent(XContentParser parser) throws IOException { + return PARSER.apply(parser, null); + } + + @Override + protected Query doToQuery(QueryShardContext context) throws IOException { + MappedFieldType fieldType = context.fieldMapper(field); + if (fieldType == null) { + throw new IllegalArgumentException("Cannot create IntervalQuery over non-existent field [" + field + "]"); + } + return new IntervalQuery(field, sourceProvider.getSource(fieldType)); + } + + @Override + protected boolean doEquals(IntervalQueryBuilder other) { + return Objects.equals(field, other.field) && Objects.equals(sourceProvider, other.sourceProvider); + } + + @Override + protected int doHashCode() { + return Objects.hash(field, sourceProvider); + } + + @Override + public String getWriteableName() { + return NAME; + } +} diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java new file mode 100644 index 0000000000000..3f25e0e5a61b4 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -0,0 +1,351 @@ +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.intervals.IntervalIterator; +import org.apache.lucene.search.intervals.Intervals; +import org.apache.lucene.search.intervals.IntervalsSource; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParsingException; +import org.elasticsearch.common.io.stream.NamedWriteable; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ToXContentObject; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.mapper.MappedFieldType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; +import java.util.Objects; +import java.util.Set; + +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; + +public abstract class IntervalsSourceProvider implements NamedWriteable, ToXContentObject { + + public abstract IntervalsSource getSource(MappedFieldType fieldType) throws IOException; + + @Override + public abstract int hashCode(); + + @Override + public abstract boolean equals(Object other); + + public static IntervalsSourceProvider fromXContent(XContentParser parser) throws IOException { + if (parser.currentToken() != XContentParser.Token.START_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "Malformed IntervalsSource definition, expected start_object"); + } + if (parser.nextToken() != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), "Malformed IntervalsSource definition, no field after start_object"); + } + String sourceType = parser.currentName(); + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "Malformed IntervalsSource definition, expected start_object after source name"); + } + IntervalsSourceProvider provider = parser.namedObject(IntervalsSourceProvider.class, sourceType, null); + //end_object of the specific query (e.g. match, multi_match etc.) element + if (parser.currentToken() != XContentParser.Token.END_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), + "[" + sourceType + "] malformed source, expected [END_OBJECT] but found [" + parser.currentToken() + "]"); + } + //end_object of the query object + if (parser.nextToken() != XContentParser.Token.END_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), + "[" + sourceType + "] malformed source, expected [END_OBJECT] but found [" + parser.currentToken() + "]"); + } + return provider; + } + + public static final IntervalsSource NO_INTERVALS = new IntervalsSource() { + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + return new IntervalIterator() { + @Override + public int start() { + return NO_MORE_INTERVALS; + } + + @Override + public int end() { + return NO_MORE_INTERVALS; + } + + @Override + public int nextInterval() throws IOException { + return NO_MORE_INTERVALS; + } + + @Override + public float matchCost() { + return 0; + } + + @Override + public int docID() { + return NO_MORE_DOCS; + } + + @Override + public int nextDoc() throws IOException { + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) throws IOException { + return NO_MORE_DOCS; + } + + @Override + public long cost() { + return 0; + } + }; + } + + @Override + public void extractTerms(String field, Set terms) { + + } + + @Override + public int hashCode() { + return 0; + } + + @Override + public boolean equals(Object other) { + return other == this; + } + + @Override + public String toString() { + return "no_match"; + } + }; + + public static class Match extends IntervalsSourceProvider { + + public static final String NAME = "match"; + + private final String text; + private final int maxWidth; + private final boolean ordered; + + public Match(String text, int maxWidth, boolean ordered) { + this.text = text; + this.maxWidth = maxWidth; + this.ordered = ordered; + } + + public Match(StreamInput in) throws IOException { + this.text = in.readString(); + this.maxWidth = in.readInt(); + this.ordered = in.readBoolean(); + } + + @Override + public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { + List subSources = new ArrayList<>(); + try (TokenStream ts = fieldType.tokenize(fieldType.name(), text)) { + // TODO synonyms -> run through GraphTokenStreamFiniteStrings? + TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + BytesRef term = bytesAtt.getBytesRef(); + subSources.add(Intervals.term(BytesRef.deepCopyOf(term))); + } + ts.end(); + } + if (subSources.size() == 0) { + return NO_INTERVALS; + } + if (subSources.size() == 1) { + return subSources.get(0); + } + IntervalsSource source = ordered ? + Intervals.ordered(subSources.toArray(new IntervalsSource[]{})) : + Intervals.unordered(subSources.toArray(new IntervalsSource[]{})); + if (maxWidth != Integer.MAX_VALUE) { + return Intervals.maxwidth(maxWidth, source); + } + return source; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Match match = (Match) o; + return Objects.equals(text, match.text) && Objects.equals(maxWidth, match.maxWidth) + && Objects.equals(ordered, match.ordered); + } + + @Override + public int hashCode() { + return Objects.hash(text, maxWidth, ordered); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(text); + out.writeInt(maxWidth); + out.writeBoolean(ordered); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startObject(NAME); + builder.field("text", text); + builder.field("max_width", maxWidth); + builder.field("ordered", ordered); + return builder.endObject().endObject(); + } + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, + args -> { + String text = (String) args[0]; + int max_width = (args[1] == null ? Integer.MAX_VALUE : (Integer) args[1]); + boolean ordered = (args[2] == null ? false : (Boolean) args[2]); + return new Match(text, max_width, ordered); + }); + static { + PARSER.declareString(constructorArg(), new ParseField("text")); + PARSER.declareInt(optionalConstructorArg(), new ParseField("max_width")); + PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered")); + } + + public static Match fromXContent(XContentParser parser) throws IOException { + return PARSER.apply(parser, null); + } + } + + public static class Combine extends IntervalsSourceProvider { + + public static final String NAME = "combine"; + + protected enum Type { + ORDERED { + @Override + IntervalsSource getSource(List subSources) { + return Intervals.ordered(subSources.toArray(new IntervalsSource[0])); + } + }, UNORDERED { + @Override + IntervalsSource getSource(List subSources) { + return Intervals.unordered(subSources.toArray(new IntervalsSource[0])); + } + }, OR { + @Override + IntervalsSource getSource(List subSources) { + return Intervals.or(subSources.toArray(new IntervalsSource[0])); + } + }; + + abstract IntervalsSource getSource(List subSources); + } + + private final List subSources; + private final Type type; + private final int maxWidth; + + public Combine(List subSources, Type type, int maxWidth) { + this.subSources = subSources; + this.type = type; + this.maxWidth = maxWidth; + } + + public Combine(StreamInput in) throws IOException { + this.type = in.readEnum(Type.class); + this.subSources = in.readNamedWriteableList(IntervalsSourceProvider.class); + this.maxWidth = in.readInt(); + } + + @Override + public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { + List ss = new ArrayList<>(); + for (IntervalsSourceProvider provider : subSources) { + ss.add(provider.getSource(fieldType)); + } + IntervalsSource source = type.getSource(ss); + if (maxWidth == Integer.MAX_VALUE) { + return source; + } + return Intervals.maxwidth(maxWidth, source); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Combine combine = (Combine) o; + return Objects.equals(subSources, combine.subSources) && + type == combine.type && maxWidth == combine.maxWidth; + } + + @Override + public int hashCode() { + return Objects.hash(subSources, type, maxWidth); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeEnum(type); + out.writeNamedWriteableList(subSources); + out.writeInt(maxWidth); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startObject(NAME); + builder.field("type", type.toString().toLowerCase(Locale.ROOT)); + builder.field("max_width", maxWidth); + builder.startArray("sources"); + for (IntervalsSourceProvider provider : subSources) { + provider.toXContent(builder, params); + } + builder.endArray(); + builder.endObject(); + return builder.endObject(); + } + + @SuppressWarnings("unchecked") + static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, + args -> { + Type type = Type.valueOf(((String)args[0]).toUpperCase(Locale.ROOT)); + List subSources = (List)args[1]; + Integer maxWidth = (args[2] == null ? Integer.MAX_VALUE : (Integer)args[2]); + return new Combine(subSources, type, maxWidth); + }); + static { + PARSER.declareString(constructorArg(), new ParseField("type")); + PARSER.declareObjectArray(constructorArg(), (p, c) -> IntervalsSourceProvider.fromXContent(p), new ParseField("sources")); + PARSER.declareInt(optionalConstructorArg(), new ParseField("max_width")); + } + + public static Combine fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + } + +} diff --git a/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java b/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java index 952aa76fd17a9..10c6f565d2402 100644 --- a/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java +++ b/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java @@ -28,6 +28,7 @@ import org.elasticsearch.common.lucene.search.function.ScoreFunction; import org.elasticsearch.common.xcontent.XContent; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.query.IntervalsSourceProvider; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryParser; import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilder; @@ -126,11 +127,17 @@ default List getPipelineAggregations() { return emptyList(); } /** - * The next {@link Rescorer}s added by this plugin. + * The new {@link Rescorer}s added by this plugin. */ default List> getRescorers() { return emptyList(); } + /** + * The new {@link IntervalsSourceProvider}s added by this plugin + */ + default List> getIntervalsSourceProviders() { + return emptyList(); + } /** * Specification of custom {@link ScoreFunction}. @@ -208,6 +215,21 @@ public QuerySpec(String name, Writeable.Reader reader, QueryParser parser) super(name, reader, parser); } } + + /** + * Specification of custom {@link IntervalsSourceProvider} + */ + class IntervalSpec extends SearchExtensionSpec> { + + /** + * Specification of custom {@link IntervalsSourceProvider} + */ + public IntervalSpec(String name, Writeable.Reader reader, CheckedFunction parser) { + super(name, reader, parser); + } + + } + /** * Specification for an {@link Aggregation}. */ diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index efef1aeb04f76..ef9d1bafc7b5b 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -44,6 +44,8 @@ import org.elasticsearch.index.query.GeoPolygonQueryBuilder; import org.elasticsearch.index.query.GeoShapeQueryBuilder; import org.elasticsearch.index.query.IdsQueryBuilder; +import org.elasticsearch.index.query.IntervalQueryBuilder; +import org.elasticsearch.index.query.IntervalsSourceProvider; import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.index.query.MatchNoneQueryBuilder; import org.elasticsearch.index.query.MatchPhrasePrefixQueryBuilder; @@ -291,6 +293,7 @@ public SearchModule(Settings settings, boolean transportClient, List plugins) { registerQuery(new QuerySpec<>(ExistsQueryBuilder.NAME, ExistsQueryBuilder::new, ExistsQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(MatchNoneQueryBuilder.NAME, MatchNoneQueryBuilder::new, MatchNoneQueryBuilder::fromXContent)); registerQuery(new QuerySpec<>(TermsSetQueryBuilder.NAME, TermsSetQueryBuilder::new, TermsSetQueryBuilder::fromXContent)); + registerQuery(new QuerySpec<>(IntervalQueryBuilder.NAME, IntervalQueryBuilder::new, IntervalQueryBuilder::fromXContent)); if (ShapesAvailability.JTS_AVAILABLE && ShapesAvailability.SPATIAL4J_AVAILABLE) { registerQuery(new QuerySpec<>(GeoShapeQueryBuilder.NAME, GeoShapeQueryBuilder::new, GeoShapeQueryBuilder::fromXContent)); @@ -781,12 +785,27 @@ private void registerQueryParsers(List plugins) { registerFromPlugin(plugins, SearchPlugin::getQueries, this::registerQuery); } + private void registerIntervalsSourceProviders(List plugins) { + registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Match.NAME, + IntervalsSourceProvider.Match::new, IntervalsSourceProvider.Match::fromXContent)); + registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Combine.NAME, + IntervalsSourceProvider.Combine::new, IntervalsSourceProvider.Combine::fromXContent)); + registerFromPlugin(plugins, SearchPlugin::getIntervalsSourceProviders, this::registerIntervalsSourceProvider); + } + private void registerQuery(QuerySpec spec) { namedWriteables.add(new NamedWriteableRegistry.Entry(QueryBuilder.class, spec.getName().getPreferredName(), spec.getReader())); namedXContents.add(new NamedXContentRegistry.Entry(QueryBuilder.class, spec.getName(), (p, c) -> spec.getParser().fromXContent(p))); } + private void registerIntervalsSourceProvider(SearchPlugin.IntervalSpec spec) { + namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class, + spec.getName().getPreferredName(), spec.getReader())); + namedXContents.add(new NamedXContentRegistry.Entry(IntervalsSourceProvider.class, spec.getName(), + (p, c) -> spec.getParser().apply(p))); + } + public FetchPhase getFetchPhase() { return new FetchPhase(fetchSubPhases); } diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java new file mode 100644 index 0000000000000..0c33c509f615c --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -0,0 +1,107 @@ +package org.elasticsearch.index.query; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.intervals.IntervalQuery; +import org.apache.lucene.search.intervals.Intervals; +import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.test.AbstractQueryTestCase; + +import java.io.IOException; +import java.util.Arrays; + +import static org.hamcrest.Matchers.instanceOf; + +public class IntervalQueryBuilderTests extends AbstractQueryTestCase { + + @Override + protected IntervalQueryBuilder doCreateTestQueryBuilder() { + assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); + IntervalsSourceProvider match1 = new IntervalsSourceProvider.Match("jabber crackpot henceforth", Integer.MAX_VALUE, true); + IntervalsSourceProvider match2 = new IntervalsSourceProvider.Match("floo", Integer.MAX_VALUE, true); + IntervalsSourceProvider combi + = new IntervalsSourceProvider.Combine(Arrays.asList(match1, match2), IntervalsSourceProvider.Combine.Type.ORDERED, 30); + return new IntervalQueryBuilder(STRING_FIELD_NAME, combi); + } + + @Override + protected void doAssertLuceneQuery(IntervalQueryBuilder queryBuilder, Query query, SearchContext context) throws IOException { + assertThat(query, instanceOf(IntervalQuery.class)); + } + + public void testMatchInterval() throws IOException { + + assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); + + String json = "{ \"intervals\" : " + + "{ \"field\" : \"" + STRING_FIELD_NAME + "\"," + + " \"source\" : { \"match\" : { " + + " \"text\" : \"Hello world\" } } } }"; + + IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); + IntervalQuery expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.unordered(Intervals.term("hello"), Intervals.term("world"))); + + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"field\" : \"" + STRING_FIELD_NAME + "\"," + + " \"source\" : { \"match\" : { " + + " \"text\" : \"Hello world\"," + + " \"max_width\" : 40 } } } }"; + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.maxwidth(40, Intervals.unordered(Intervals.term("hello"), Intervals.term("world")))); + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"field\" : \"" + STRING_FIELD_NAME + "\"," + + " \"source\" : { \"match\" : { " + + " \"text\" : \"Hello world\"," + + " \"ordered\" : \"true\" } } } }"; + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.ordered(Intervals.term("hello"), Intervals.term("world"))); + assertEquals(expected, builder.toQuery(createShardContext())); + + } + + public void testCombineInterval() throws IOException { + + assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); + + String json = "{ \"intervals\" : " + + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + + " \"source\" : { " + + " \"combine\" : {" + + " \"type\" : \"or\"," + + " \"sources\" : [" + + " { \"match\" : { \"text\" : \"one\" } }," + + " { \"match\" : { \"text\" : \"two\" } } ]," + + " \"max_width\" : 30 } } } }"; + IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); + Query expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.maxwidth(30, Intervals.or(Intervals.term("one"), Intervals.term("two")))); + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + + " \"source\" : { " + + " \"combine\" : {" + + " \"type\" : \"ordered\"," + + " \"sources\" : [" + + " { \"match\" : { \"text\" : \"one\" } }," + + " { \"combine\" : { " + + " \"type\" : \"unordered\"," + + " \"sources\" : [" + + " { \"match\" : { \"text\" : \"two\" } }," + + " { \"match\" : { \"text\" : \"three\" } } ] } } ]," + + " \"max_width\" : 30 } } } }"; + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.maxwidth(30, Intervals.ordered( + Intervals.term("one"), + Intervals.unordered(Intervals.term("two"), Intervals.term("three"))))); + assertEquals(expected, builder.toQuery(createShardContext())); + + } +} From 7d9b9ef9cbbdcf76c34be5733e91b1e45250b995 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 26 Jul 2018 16:31:02 +0100 Subject: [PATCH 02/29] Add relative intervals --- .../index/query/IntervalsSourceProvider.java | 112 ++++++++++++++++++ .../elasticsearch/search/SearchModule.java | 2 + .../query/IntervalQueryBuilderTests.java | 66 +++++++++++ 3 files changed, 180 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 3f25e0e5a61b4..7992b6b27999f 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -348,4 +348,116 @@ public static Combine fromXContent(XContentParser parser) { } } + public static class Relate extends IntervalsSourceProvider { + + public static final String NAME = "relate"; + + public enum Relation { + CONTAINING { + @Override + IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { + return Intervals.containing(source, filter); + } + }, NOT_CONTAINING { + @Override + IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { + return Intervals.notContaining(source, filter); + } + }, CONTAINED_BY { + @Override + IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { + return Intervals.containedBy(source, filter); + } + }, NOT_CONTAINED_BY { + @Override + IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { + return Intervals.notContainedBy(source, filter); + } + }, NOT_OVERLAPPING { + @Override + IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { + return Intervals.nonOverlapping(source, filter); + } + }; + abstract IntervalsSource getSource(IntervalsSource source, IntervalsSource filter); + } + + private final IntervalsSourceProvider source; + private final IntervalsSourceProvider filter; + private final Relation relation; + + public Relate(IntervalsSourceProvider source, IntervalsSourceProvider filter, Relation relation) { + this.source = source; + this.filter = filter; + this.relation = relation; + } + + public Relate(StreamInput in) throws IOException { + this.source = in.readNamedWriteable(IntervalsSourceProvider.class); + this.filter = in.readNamedWriteable(IntervalsSourceProvider.class); + this.relation = in.readEnum(Relation.class); + } + + @Override + public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { + IntervalsSource s = source.getSource(fieldType); + IntervalsSource f = filter.getSource(fieldType); + return relation.getSource(s, f); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Relate relate = (Relate) o; + return Objects.equals(source, relate.source) && + Objects.equals(filter, relate.filter) && + relation == relate.relation; + } + + @Override + public int hashCode() { + return Objects.hash(source, filter, relation); + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeNamedWriteable(source); + out.writeNamedWriteable(filter); + out.writeEnum(relation); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startObject(NAME); + builder.field("source", source); + builder.field("filter", filter); + builder.field("relation", relation.toString().toLowerCase(Locale.ROOT)); + builder.endObject(); + builder.endObject(); + return builder; + } + + static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, + args -> { + Relation relation = Relation.valueOf(((String)args[2]).toUpperCase(Locale.ROOT)); + return new Relate((IntervalsSourceProvider)args[0], (IntervalsSourceProvider)args[1], relation); + }); + static { + PARSER.declareObject(constructorArg(), (p, c) -> IntervalsSourceProvider.fromXContent(p), new ParseField("source")); + PARSER.declareObject(constructorArg(), (p, c) -> IntervalsSourceProvider.fromXContent(p), new ParseField("filter")); + PARSER.declareString(constructorArg(), new ParseField("relation")); + } + + public static Relate fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); + } + } + } diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index ef9d1bafc7b5b..9db5bd1311f98 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -790,6 +790,8 @@ private void registerIntervalsSourceProviders(List plugins) { IntervalsSourceProvider.Match::new, IntervalsSourceProvider.Match::fromXContent)); registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Combine.NAME, IntervalsSourceProvider.Combine::new, IntervalsSourceProvider.Combine::fromXContent)); + registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Relate.NAME, + IntervalsSourceProvider.Relate::new, IntervalsSourceProvider.Relate::fromXContent)); registerFromPlugin(plugins, SearchPlugin::getIntervalsSourceProviders, this::registerIntervalsSourceProvider); } diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index 0c33c509f615c..8a50394bd3153 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -9,6 +9,7 @@ import java.io.IOException; import java.util.Arrays; +import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.instanceOf; public class IntervalQueryBuilderTests extends AbstractQueryTestCase { @@ -104,4 +105,69 @@ public void testCombineInterval() throws IOException { assertEquals(expected, builder.toQuery(createShardContext())); } + + public void testRelateIntervals() throws IOException { + + assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); + + String json = "{ \"intervals\" : " + + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + + " \"source\" : { " + + " \"relate\" : {" + + " \"relation\" : \"containing\"," + + " \"source\" : { \"match\" : { \"text\" : \"one\" } }," + + " \"filter\" : { \"match\" : { \"text\" : \"two\" } } } } } }"; + IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); + Query expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.containing(Intervals.term("one"), Intervals.term("two"))); + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + + " \"source\" : { " + + " \"relate\" : {" + + " \"relation\" : \"contained_by\"," + + " \"source\" : { \"match\" : { \"text\" : \"one\" } }," + + " \"filter\" : { \"match\" : { \"text\" : \"two\" } } } } } }"; + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.containedBy(Intervals.term("one"), Intervals.term("two"))); + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + + " \"source\" : { " + + " \"relate\" : {" + + " \"relation\" : \"not_containing\"," + + " \"source\" : { \"match\" : { \"text\" : \"one\" } }," + + " \"filter\" : { \"match\" : { \"text\" : \"two\" } } } } } }"; + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.notContaining(Intervals.term("one"), Intervals.term("two"))); + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + + " \"source\" : { " + + " \"relate\" : {" + + " \"relation\" : \"not_contained_by\"," + + " \"source\" : { \"match\" : { \"text\" : \"one\" } }," + + " \"filter\" : { \"match\" : { \"text\" : \"two\" } } } } } }"; + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.notContainedBy(Intervals.term("one"), Intervals.term("two"))); + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + + " \"source\" : { " + + " \"relate\" : {" + + " \"relation\" : \"not_overlapping\"," + + " \"source\" : { \"match\" : { \"text\" : \"one\" } }," + + " \"filter\" : { \"match\" : { \"text\" : \"two\" } } } } } }"; + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.nonOverlapping(Intervals.term("one"), Intervals.term("two"))); + assertEquals(expected, builder.toQuery(createShardContext())); + } } From b0439c3547dce8202361d5c27cd6c608b4084f43 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 31 Aug 2018 13:00:36 +0100 Subject: [PATCH 03/29] feedback --- .../index/mapper/MappedFieldType.java | 4 -- .../index/mapper/TextFieldMapper.java | 5 -- .../index/query/IntervalQueryBuilder.java | 5 ++ .../index/query/IntervalsSourceProvider.java | 8 ++- .../query/IntervalQueryBuilderTests.java | 68 +++++++++++++++---- 5 files changed, 66 insertions(+), 24 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 5216976903cf2..4a3fa852e7f7d 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -369,10 +369,6 @@ public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolea throw new IllegalArgumentException("Can only use phrase queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } - public TokenStream tokenize(String field, String text) { - throw new IllegalArgumentException("Can only tokenize text on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); - } - /** * An enum used to describe the relation between the range of terms in a * shard when compared with a query range diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 64611db29b7e2..29f1cbb721feb 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -582,11 +582,6 @@ public Query existsQuery(QueryShardContext context) { } } - @Override - public TokenStream tokenize(String field, String text) { - return searchAnalyzer().tokenStream(field, text); - } - @Override public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java index 1b0ac3363f558..140c2e63d900d 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java @@ -1,5 +1,6 @@ package org.elasticsearch.index.query; +import org.apache.lucene.index.IndexOptions; import org.apache.lucene.search.Query; import org.apache.lucene.search.intervals.IntervalQuery; import org.elasticsearch.common.ParseField; @@ -67,6 +68,10 @@ protected Query doToQuery(QueryShardContext context) throws IOException { if (fieldType == null) { throw new IllegalArgumentException("Cannot create IntervalQuery over non-existent field [" + field + "]"); } + if (fieldType.tokenized() == false || + fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { + throw new IllegalArgumentException("Cannot create IntervalQuery over field [" + field + "] with no indexed positions"); + } return new IntervalQuery(field, sourceProvider.getSource(fieldType)); } diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 7992b6b27999f..3308ad7efbfa9 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -1,7 +1,9 @@ package org.elasticsearch.index.query; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.intervals.IntervalIterator; @@ -155,7 +157,11 @@ public Match(StreamInput in) throws IOException { @Override public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { List subSources = new ArrayList<>(); - try (TokenStream ts = fieldType.tokenize(fieldType.name(), text)) { + if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { + throw new IllegalArgumentException("Cannot create source against field [" + fieldType.name() + "] with no positions indexed"); + } + Analyzer analyzer = fieldType.searchAnalyzer(); + try (TokenStream ts = analyzer.tokenStream(fieldType.name(), text)) { // TODO synonyms -> run through GraphTokenStreamFiniteStrings? TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); ts.reset(); diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index 8a50394bd3153..026602e9036e1 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -7,21 +7,46 @@ import org.elasticsearch.test.AbstractQueryTestCase; import java.io.IOException; -import java.util.Arrays; +import java.util.ArrayList; +import java.util.List; -import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; public class IntervalQueryBuilderTests extends AbstractQueryTestCase { @Override protected IntervalQueryBuilder doCreateTestQueryBuilder() { - assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); - IntervalsSourceProvider match1 = new IntervalsSourceProvider.Match("jabber crackpot henceforth", Integer.MAX_VALUE, true); - IntervalsSourceProvider match2 = new IntervalsSourceProvider.Match("floo", Integer.MAX_VALUE, true); - IntervalsSourceProvider combi - = new IntervalsSourceProvider.Combine(Arrays.asList(match1, match2), IntervalsSourceProvider.Combine.Type.ORDERED, 30); - return new IntervalQueryBuilder(STRING_FIELD_NAME, combi); + return new IntervalQueryBuilder(STRING_FIELD_NAME, createRandomSource()); + } + + private IntervalsSourceProvider createRandomSource() { + switch (randomInt(20)) { + case 0: + IntervalsSourceProvider source1 = createRandomSource(); + IntervalsSourceProvider source2 = createRandomSource(); + int relOrd = randomInt(IntervalsSourceProvider.Relate.Relation.values().length - 1); + return new IntervalsSourceProvider.Relate(source1, source2, IntervalsSourceProvider.Relate.Relation.values()[relOrd]); + case 1: + case 2: + case 3: + int count = randomInt(5) + 1; + List subSources = new ArrayList<>(); + for (int i = 0; i < count; i++) { + subSources.add(createRandomSource()); + } + int typeOrd = randomInt(IntervalsSourceProvider.Combine.Type.values().length - 1); + int width = randomBoolean() ? Integer.MAX_VALUE : randomIntBetween(count, 100); + return new IntervalsSourceProvider.Combine(subSources, IntervalsSourceProvider.Combine.Type.values()[typeOrd], width); + default: + int wordCount = randomInt(4) + 1; + List words = new ArrayList<>(); + for (int i = 0; i < wordCount; i++) { + words.add(randomRealisticUnicodeOfLengthBetween(4, 20)); + } + String text = String.join(" ", words); + return new IntervalsSourceProvider.Match(text, randomBoolean() ? Integer.MAX_VALUE : randomIntBetween(1, 20), randomBoolean()); + } } @Override @@ -31,8 +56,6 @@ protected void doAssertLuceneQuery(IntervalQueryBuilder queryBuilder, Query quer public void testMatchInterval() throws IOException { - assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); - String json = "{ \"intervals\" : " + "{ \"field\" : \"" + STRING_FIELD_NAME + "\"," + " \"source\" : { \"match\" : { " + @@ -68,8 +91,6 @@ public void testMatchInterval() throws IOException { public void testCombineInterval() throws IOException { - assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); - String json = "{ \"intervals\" : " + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + " \"source\" : { " + @@ -108,8 +129,6 @@ public void testCombineInterval() throws IOException { public void testRelateIntervals() throws IOException { - assumeTrue("test runs only when at least a type is registered", getCurrentTypes().length > 0); - String json = "{ \"intervals\" : " + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + " \"source\" : { " + @@ -170,4 +189,25 @@ public void testRelateIntervals() throws IOException { Intervals.nonOverlapping(Intervals.term("one"), Intervals.term("two"))); assertEquals(expected, builder.toQuery(createShardContext())); } + + public void testNonIndexedFields() { + IntervalsSourceProvider provider = createRandomSource(); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { + IntervalQueryBuilder builder = new IntervalQueryBuilder("no_such_field", provider); + builder.doToQuery(createShardContext()); + }); + assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over non-existent field [no_such_field]")); + + e = expectThrows(IllegalArgumentException.class, () -> { + IntervalQueryBuilder builder = new IntervalQueryBuilder(INT_FIELD_NAME, provider); + builder.doToQuery(createShardContext()); + }); + assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" + INT_FIELD_NAME + "] with no indexed positions")); + + e = expectThrows(IllegalArgumentException.class, () -> { + IntervalQueryBuilder builder = new IntervalQueryBuilder(STRING_FIELD_NAME_2, provider); + builder.doToQuery(createShardContext()); + }); + assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" + STRING_FIELD_NAME_2 + "] with no indexed positions")); + } } From 6cb7fe8e0a4a795cf7bef0fcc49cc773befeab95 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 5 Sep 2018 08:16:18 +0100 Subject: [PATCH 04/29] YAML test - broekn --- .../test/search/220_interval_query.yml | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/220_interval_query.yml diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/220_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/220_interval_query.yml new file mode 100644 index 0000000000000..78c84f68d2b3e --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/220_interval_query.yml @@ -0,0 +1,24 @@ +setup: + - skip: + - version: " - 6.99.99" + - reason: "Implemented in 7.0" + + - do: + indices.create: + index: test + body: + mappings: + test: + properties: + text: + type: text + - do: + index: + index: test + type: test + id: 1 + body: { text: some short words and a stupendously long one } + + - do: + indices.refresh: + index: [test] From b0d28aa3fd6a156278a84d902655381c75aebae4 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 1 Oct 2018 08:24:40 +0100 Subject: [PATCH 05/29] yaml test; begin to add block source --- .../test/search/220_interval_query.yml | 24 --------- .../test/search/230_interval_query.yml | 38 ++++++++++++++ .../index/query/IntervalsSourceProvider.java | 50 ++++++++++++++++++- 3 files changed, 87 insertions(+), 25 deletions(-) delete mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/220_interval_query.yml create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/220_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/220_interval_query.yml deleted file mode 100644 index 78c84f68d2b3e..0000000000000 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/220_interval_query.yml +++ /dev/null @@ -1,24 +0,0 @@ -setup: - - skip: - - version: " - 6.99.99" - - reason: "Implemented in 7.0" - - - do: - indices.create: - index: test - body: - mappings: - test: - properties: - text: - type: text - - do: - index: - index: test - type: test - id: 1 - body: { text: some short words and a stupendously long one } - - - do: - indices.refresh: - index: [test] diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml new file mode 100644 index 0000000000000..da6ea5269123e --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml @@ -0,0 +1,38 @@ +setup: + - skip: + - version: " - 6.99.99" + - reason: "Implemented in 7.0" + + - do: + indices.create: + index: test + body: + mappings: + test: + properties: + text: + type: text + analyzer: standard + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "test", "_type": "test", "_id": "1"}}' + - '{"text" : "Some like it hot, some like it cold"}' + - '{"index": {"_index": "test", "_type": "test", "_id": "2"}}' + - '{"text" : "Its cold outside, theres no kind of atmosphere"}' + - '{"index": {"_index": "test", "_type": "test", "_id": "3"}}' + - '{"text" : "Baby its cold outside"}' + + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + match: "cold outside" + ordered: true + - match: + count: { hits.total: 2 } diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 1be03805d1432..119a481d51da1 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -7,7 +7,6 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.MatchesIterator; -import org.apache.lucene.search.MatchesUtils; import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.search.intervals.Intervals; import org.apache.lucene.search.intervals.IntervalsSource; @@ -247,6 +246,55 @@ public static Match fromXContent(XContentParser parser) throws IOException { } } + public static class Block extends IntervalsSourceProvider { + + public static final String NAME = "block"; + + private final List subSources; + + public Block(List subSources) { + this.subSources = subSources; + } + + public Block(StreamInput in) throws IOException { + this.subSources = in.readNamedWriteableList(IntervalsSourceProvider.class); + } + + @Override + public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { + List sources = new ArrayList<>(); + for (IntervalsSourceProvider provider : subSources) { + sources.add(provider.getSource(fieldType)); + } + return Intervals.phrase(sources.toArray(new IntervalsSource[0])); + } + + @Override + public int hashCode() { + return 0; + } + + @Override + public boolean equals(Object other) { + return false; + } + + @Override + public String getWriteableName() { + return NAME; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeNamedWriteableList(subSources); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + return null; + } + } + public static class Combine extends IntervalsSourceProvider { public static final String NAME = "combine"; From a8806e22672625fbb8075f53a8320066685bba1d Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 2 Oct 2018 15:43:41 +0100 Subject: [PATCH 06/29] Add block; make disjunction its own source --- .../index/query/IntervalsSourceProvider.java | 101 ++++++++++++++++-- .../elasticsearch/search/SearchModule.java | 2 + 2 files changed, 95 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 119a481d51da1..7d0760c9897f7 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -67,6 +67,69 @@ public static IntervalsSourceProvider fromXContent(XContentParser parser) throws return provider; } + public static IntervalsSource disjunction(List subSources) { + List rewritten = new ArrayList<>(); + for (IntervalsSource source : subSources) { + if (source instanceof DisjunctionIntervalsSource) { + rewritten.addAll(((DisjunctionIntervalsSource) source).subSources); + } + else { + rewritten.add(source); + } + } + if (rewritten.size() == 0) { + return NO_INTERVALS; + } + if (rewritten.size() == 1) { + return rewritten.get(0); + } + return new DisjunctionIntervalsSource(rewritten); + } + + public static class DisjunctionIntervalsSource extends IntervalsSource { + + private final List subSources; + private final IntervalsSource delegate; + + public DisjunctionIntervalsSource(List subSources) { + this.subSources = subSources; + this.delegate = Intervals.or(subSources.toArray(new IntervalsSource[]{})); + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + return delegate.intervals(field, ctx); + } + + @Override + public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException { + return delegate.matches(field, ctx, doc); + } + + @Override + public void extractTerms(String field, Set terms) { + delegate.extractTerms(field, terms); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DisjunctionIntervalsSource that = (DisjunctionIntervalsSource) o; + return Objects.equals(delegate, that.delegate); + } + + @Override + public int hashCode() { + return Objects.hash(delegate); + } + + @Override + public String toString() { + return delegate.toString(); + } + } + public static final IntervalsSource NO_INTERVALS = new IntervalsSource() { @Override @@ -246,17 +309,17 @@ public static Match fromXContent(XContentParser parser) throws IOException { } } - public static class Block extends IntervalsSourceProvider { + public static class Disjunction extends IntervalsSourceProvider { - public static final String NAME = "block"; + public static final String NAME = "or"; private final List subSources; - public Block(List subSources) { + public Disjunction(List subSources) { this.subSources = subSources; } - public Block(StreamInput in) throws IOException { + public Disjunction(StreamInput in) throws IOException { this.subSources = in.readNamedWriteableList(IntervalsSourceProvider.class); } @@ -266,7 +329,7 @@ public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { for (IntervalsSourceProvider provider : subSources) { sources.add(provider.getSource(fieldType)); } - return Intervals.phrase(sources.toArray(new IntervalsSource[0])); + return disjunction(sources); } @Override @@ -291,7 +354,29 @@ public void writeTo(StreamOutput out) throws IOException { @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - return null; + builder.startObject(); + builder.startObject(NAME); + builder.startArray("sources"); + for (IntervalsSourceProvider provider : subSources) { + provider.toXContent(builder, params); + } + builder.endArray(); + builder.endObject(); + return builder.endObject(); + } + + @SuppressWarnings("unchecked") + static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, + args -> { + List subSources = (List)args[0]; + return new Disjunction(subSources); + }); + static { + PARSER.declareObjectArray(constructorArg(), (p, c) -> IntervalsSourceProvider.fromXContent(p), new ParseField("sources")); + } + + public static Disjunction fromXContent(XContentParser parser) { + return PARSER.apply(parser, null); } } @@ -310,10 +395,10 @@ IntervalsSource getSource(List subSources) { IntervalsSource getSource(List subSources) { return Intervals.unordered(subSources.toArray(new IntervalsSource[0])); } - }, OR { + }, BLOCK { @Override IntervalsSource getSource(List subSources) { - return Intervals.or(subSources.toArray(new IntervalsSource[0])); + return Intervals.phrase(subSources.toArray(new IntervalsSource[0])); } }; diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index f78d0704437b9..fc512fd26815f 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -807,6 +807,8 @@ private void registerQueryParsers(List plugins) { private void registerIntervalsSourceProviders(List plugins) { registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Match.NAME, IntervalsSourceProvider.Match::new, IntervalsSourceProvider.Match::fromXContent)); + registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Disjunction.NAME, + IntervalsSourceProvider.Disjunction::new, IntervalsSourceProvider.Disjunction::fromXContent)); registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Combine.NAME, IntervalsSourceProvider.Combine::new, IntervalsSourceProvider.Combine::fromXContent)); registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Relate.NAME, From 8489e864412d5ba76a0af99111c319c0a77d2b81 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 12 Nov 2018 10:12:47 +0000 Subject: [PATCH 07/29] WIP --- .../index/query/ESIntervalsSource.java | 423 ++++++++++++++++++ .../index/query/IntervalsSourceProvider.java | 275 +++--------- 2 files changed, 487 insertions(+), 211 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/query/ESIntervalsSource.java diff --git a/server/src/main/java/org/elasticsearch/index/query/ESIntervalsSource.java b/server/src/main/java/org/elasticsearch/index/query/ESIntervalsSource.java new file mode 100644 index 0000000000000..43e4abe0c41a3 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/ESIntervalsSource.java @@ -0,0 +1,423 @@ +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchesIterator; +import org.apache.lucene.search.intervals.IntervalIterator; +import org.apache.lucene.search.intervals.Intervals; +import org.apache.lucene.search.intervals.IntervalsSource; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.Set; + +public abstract class ESIntervalsSource extends IntervalsSource { + + public abstract int length(); + + public abstract List subSources(); + + public static abstract class DelegatingIntervalsSource extends ESIntervalsSource { + + protected final IntervalsSource delegate; + + protected DelegatingIntervalsSource(IntervalsSource delegate) { + this.delegate = delegate; + } + + @Override + public final IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { + return delegate.intervals(field, ctx); + } + + @Override + public final MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException { + return delegate.matches(field, ctx, doc); + } + + @Override + public final void extractTerms(String field, Set terms) { + delegate.extractTerms(field, terms); + } + + @Override + public final String toString() { + return delegate.toString(); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + DisjunctionIntervalsSource that = (DisjunctionIntervalsSource) o; + return Objects.equals(delegate, that.delegate); + } + + @Override + public int hashCode() { + return Objects.hash(delegate); + } + } + + public static ESIntervalsSource disjunction(List subSources) { + if (subSources.size() == 0) { + return NO_INTERVALS; + } + if (subSources.size() == 1) { + return subSources.get(1); + } + return new DisjunctionIntervalsSource(subSources); + } + + private static class DisjunctionIntervalsSource extends DelegatingIntervalsSource { + + private final List subSources; + + public DisjunctionIntervalsSource(List subSources) { + super(Intervals.or(subSources.toArray(new IntervalsSource[0]))); + this.subSources = new ArrayList<>(subSources); + } + + @Override + public int length() { + int length = subSources.get(0).length(); + for (int i = 1; i < subSources.size(); i++) { + if (subSources.get(i).length() != length) { + return -1; + } + } + return length; + } + + @Override + public List subSources() { + return subSources; + } + } + + public static class TermIntervalsSource extends DelegatingIntervalsSource { + + public TermIntervalsSource(BytesRef term) { + super(Intervals.term(term)); + } + + @Override + public int length() { + return 1; + } + + @Override + public List subSources() { + return Collections.singletonList(this); + } + } + + public static class PhraseIntervalsSource extends DelegatingIntervalsSource { + + final int length; + final List subSources = new ArrayList<>(); + + public PhraseIntervalsSource(List terms) { + this(terms.toArray(new ESIntervalsSource[0])); + } + + public PhraseIntervalsSource(ESIntervalsSource... terms) { + super(Intervals.phrase(terms)); + subSources.addAll(Arrays.asList(terms)); + int length = 0; + for (ESIntervalsSource term : terms) { + if (term.length() > 0) { + length += term.length(); + } + else { + this.length = -1; + return; + } + } + this.length = length; + } + + @Override + public int length() { + return length; + } + + @Override + public List subSources() { + return subSources; + } + } + + public static ESIntervalsSource maxwidth(int maxWidth, ESIntervalsSource subSource) { + if (subSource.length() < 0) { + return new MaxWidthIntervalsSource(maxWidth, subSource); + } + if (subSource.length() > maxWidth) { + return NO_INTERVALS; + } + return subSource; + } + + public static class MaxWidthIntervalsSource extends DelegatingIntervalsSource { + + final ESIntervalsSource delegate; + + public MaxWidthIntervalsSource(int maxWidth, ESIntervalsSource delegate) { + super(Intervals.maxwidth(maxWidth, delegate)); + this.delegate = delegate; + } + + @Override + public int length() { + return delegate.length(); + } + + @Override + public List subSources() { + return delegate.subSources(); + } + } + + public static List analyzeQuery(String field, String query, Analyzer analyzer) throws IOException { + try (TokenStream ts = analyzer.tokenStream(field, query); + CachingTokenFilter stream = new CachingTokenFilter(ts)) { + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); + PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class); + + if (termAtt == null) { + return Collections.singletonList(NO_INTERVALS); + } + + // phase 1: read through the stream and assess the situation: + // counting the number of tokens/positions and marking if we have any synonyms. + + int numTokens = 0; + boolean hasSynonyms = false; + boolean isGraph = false; + + stream.reset(); + while (stream.incrementToken()) { + numTokens++; + int positionIncrement = posIncAtt.getPositionIncrement(); + if (positionIncrement == 0) { + hasSynonyms = true; + } + int positionLength = posLenAtt.getPositionLength(); + if (positionLength > 1) { + isGraph = true; + } + } + + // phase 2: based on token count, presence of synonyms, and options + // formulate a single term, boolean, or phrase. + + if (numTokens == 0) { + return null; + } else if (numTokens == 1) { + // single term + return Collections.singletonList(analyzeTerm(stream)); + } else if (isGraph) { + // graph + return analyzeGraph(stream); + } else { + // phrase + if (hasSynonyms) { + // phrase with single-term synonyms + return analyzeSynonyms(stream); + } else { + // simple phrase + return analyzeTerms(stream); + } + } + } + } + + public static ESIntervalsSource analyzeTerm(TokenStream ts) throws IOException { + TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + ts.reset(); + ts.incrementToken(); + return new TermIntervalsSource(bytesAtt.getBytesRef()); + } + + public static List analyzeTerms(TokenStream ts) throws IOException { + List terms = new ArrayList<>(); + TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + BytesRef term = bytesAtt.getBytesRef(); + terms.add(new TermIntervalsSource(BytesRef.deepCopyOf(term))); + } + ts.end(); + return terms; + } + + public static List analyzeSynonyms(TokenStream ts) throws IOException { + List terms = new ArrayList<>(); + List synonyms = new ArrayList<>(); + TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + if (posAtt.getPositionIncrement() == 1) { + if (synonyms.size() == 1) { + terms.add(synonyms.get(0)); + } + if (synonyms.size() > 1) { + terms.add(new DisjunctionIntervalsSource(synonyms)); + } + synonyms.clear(); + } + synonyms.add(new TermIntervalsSource(bytesAtt.getBytesRef())); + } + if (synonyms.size() == 1) { + terms.add(synonyms.get(0)); + } + else { + terms.add(new DisjunctionIntervalsSource(synonyms)); + } + return terms; + } + + public static List analyzeGraph(TokenStream source) throws IOException { + source.reset(); + GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source); + + List clauses = new ArrayList<>(); + int[] articulationPoints = graph.articulationPoints(); + int lastState = 0; + int maxClauseCount = BooleanQuery.getMaxClauseCount(); + for (int i = 0; i <= articulationPoints.length; i++) { + int start = lastState; + int end = -1; + if (i < articulationPoints.length) { + end = articulationPoints[i]; + } + lastState = end; + if (graph.hasSidePath(start)) { + List paths = new ArrayList<>(); + Iterator it = graph.getFiniteStrings(start, end); + while (it.hasNext()) { + TokenStream ts = it.next(); + ESIntervalsSource phrase = new PhraseIntervalsSource(analyzeTerms(ts)); + if (paths.size() >= maxClauseCount) { + throw new BooleanQuery.TooManyClauses(); + } + paths.add(phrase); + } + if (paths.size() > 0) { + clauses.add(new DisjunctionIntervalsSource(paths)); + } + } else { + Iterator it = graph.getFiniteStrings(start, end); + TokenStream ts = it.next(); + clauses.addAll(analyzeTerms(ts)); + assert it.hasNext() == false; + } + } + return clauses; + } + + public static final ESIntervalsSource NO_INTERVALS = new ESIntervalsSource() { + + @Override + public int length() { + return 0; + } + + @Override + public List subSources() { + return Collections.emptyList(); + } + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) { + return new IntervalIterator() { + @Override + public int start() { + return NO_MORE_INTERVALS; + } + + @Override + public int end() { + return NO_MORE_INTERVALS; + } + + @Override + public int nextInterval() { + return NO_MORE_INTERVALS; + } + + @Override + public float matchCost() { + return 0; + } + + @Override + public int docID() { + return NO_MORE_DOCS; + } + + @Override + public int nextDoc() { + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) { + return NO_MORE_DOCS; + } + + @Override + public long cost() { + return 0; + } + }; + } + + @Override + public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) { + return null; + } + + @Override + public void extractTerms(String field, Set terms) { + + } + + @Override + public int hashCode() { + return 0; + } + + @Override + public boolean equals(Object other) { + return other == this; + } + + @Override + public String toString() { + return "no_match"; + } + }; + +} diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 7d0760c9897f7..9ce0dd8c5860d 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -1,8 +1,6 @@ package org.elasticsearch.index.query; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; @@ -10,13 +8,13 @@ import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.search.intervals.Intervals; import org.apache.lucene.search.intervals.IntervalsSource; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.NamedWriteable; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ConstructingObjectParser; +import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; @@ -34,7 +32,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXContentObject { - public abstract IntervalsSource getSource(MappedFieldType fieldType) throws IOException; + public abstract ESIntervalsSource getSource(MappedFieldType fieldType) throws IOException; @Override public abstract int hashCode(); @@ -67,192 +65,69 @@ public static IntervalsSourceProvider fromXContent(XContentParser parser) throws return provider; } - public static IntervalsSource disjunction(List subSources) { - List rewritten = new ArrayList<>(); - for (IntervalsSource source : subSources) { - if (source instanceof DisjunctionIntervalsSource) { - rewritten.addAll(((DisjunctionIntervalsSource) source).subSources); + public enum Type { + + ORDERED { + @Override + public ESIntervalsSource source(List subSources) { + return null; } - else { - rewritten.add(source); + }, + UNORDERED { + @Override + public ESIntervalsSource source(List subSources) { + return null; } - } - if (rewritten.size() == 0) { - return NO_INTERVALS; - } - if (rewritten.size() == 1) { - return rewritten.get(0); - } - return new DisjunctionIntervalsSource(rewritten); - } - - public static class DisjunctionIntervalsSource extends IntervalsSource { - - private final List subSources; - private final IntervalsSource delegate; - - public DisjunctionIntervalsSource(List subSources) { - this.subSources = subSources; - this.delegate = Intervals.or(subSources.toArray(new IntervalsSource[]{})); - } - - @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { - return delegate.intervals(field, ctx); - } - - @Override - public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException { - return delegate.matches(field, ctx, doc); - } - - @Override - public void extractTerms(String field, Set terms) { - delegate.extractTerms(field, terms); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DisjunctionIntervalsSource that = (DisjunctionIntervalsSource) o; - return Objects.equals(delegate, that.delegate); - } + }, + BLOCK { + @Override + public ESIntervalsSource source(List subSources) { + return PHRASE.source(subSources); + } + }, + PHRASE { + @Override + public ESIntervalsSource source(List subSources) { + return new ESIntervalsSource.PhraseIntervalsSource(subSources); + } + } ; - @Override - public int hashCode() { - return Objects.hash(delegate); - } + public abstract ESIntervalsSource source(List subSources); - @Override - public String toString() { - return delegate.toString(); - } } - public static final IntervalsSource NO_INTERVALS = new IntervalsSource() { - - @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) { - return new IntervalIterator() { - @Override - public int start() { - return NO_MORE_INTERVALS; - } - - @Override - public int end() { - return NO_MORE_INTERVALS; - } - - @Override - public int nextInterval() { - return NO_MORE_INTERVALS; - } - - @Override - public float matchCost() { - return 0; - } - - @Override - public int docID() { - return NO_MORE_DOCS; - } - - @Override - public int nextDoc() { - return NO_MORE_DOCS; - } - - @Override - public int advance(int target) { - return NO_MORE_DOCS; - } - - @Override - public long cost() { - return 0; - } - }; - } - - @Override - public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) { - return null; - } - - @Override - public void extractTerms(String field, Set terms) { - - } - - @Override - public int hashCode() { - return 0; - } - - @Override - public boolean equals(Object other) { - return other == this; - } - - @Override - public String toString() { - return "no_match"; - } - }; - public static class Match extends IntervalsSourceProvider { public static final String NAME = "match"; private final String text; private final int maxWidth; - private final boolean ordered; + private final Type type; - public Match(String text, int maxWidth, boolean ordered) { + public Match(String text, int maxWidth, Type type) { this.text = text; this.maxWidth = maxWidth; - this.ordered = ordered; + this.type = type; } public Match(StreamInput in) throws IOException { this.text = in.readString(); this.maxWidth = in.readInt(); - this.ordered = in.readBoolean(); + this.type = in.readEnum(Type.class); } @Override - public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { - List subSources = new ArrayList<>(); + public ESIntervalsSource getSource(MappedFieldType fieldType) throws IOException { if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { throw new IllegalArgumentException("Cannot create source against field [" + fieldType.name() + "] with no positions indexed"); } Analyzer analyzer = fieldType.searchAnalyzer(); - try (TokenStream ts = analyzer.tokenStream(fieldType.name(), text)) { - // TODO synonyms -> run through GraphTokenStreamFiniteStrings? - TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); - ts.reset(); - while (ts.incrementToken()) { - BytesRef term = bytesAtt.getBytesRef(); - subSources.add(Intervals.term(BytesRef.deepCopyOf(term))); - } - ts.end(); - } - if (subSources.size() == 0) { - return NO_INTERVALS; - } - if (subSources.size() == 1) { - return subSources.get(0); - } - IntervalsSource source = ordered ? - Intervals.ordered(subSources.toArray(new IntervalsSource[]{})) : - Intervals.unordered(subSources.toArray(new IntervalsSource[]{})); + List subSources = ESIntervalsSource.analyzeQuery(fieldType.name(), text, analyzer); + ESIntervalsSource combination = type.source(subSources); if (maxWidth != Integer.MAX_VALUE) { - return Intervals.maxwidth(maxWidth, source); + return ESIntervalsSource.maxwidth(maxWidth, combination); } - return source; + return combination; } @Override @@ -261,12 +136,12 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; Match match = (Match) o; return Objects.equals(text, match.text) && Objects.equals(maxWidth, match.maxWidth) - && Objects.equals(ordered, match.ordered); + && Objects.equals(type, match.type); } @Override public int hashCode() { - return Objects.hash(text, maxWidth, ordered); + return Objects.hash(text, maxWidth, type); } @Override @@ -278,7 +153,7 @@ public String getWriteableName() { public void writeTo(StreamOutput out) throws IOException { out.writeString(text); out.writeInt(maxWidth); - out.writeBoolean(ordered); + out.writeEnum(type); } @Override @@ -287,7 +162,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(NAME); builder.field("text", text); builder.field("max_width", maxWidth); - builder.field("ordered", ordered); + builder.field("type", type.toString()); return builder.endObject().endObject(); } @@ -295,13 +170,14 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws args -> { String text = (String) args[0]; int max_width = (args[1] == null ? Integer.MAX_VALUE : (Integer) args[1]); - boolean ordered = (args[2] == null ? false : (Boolean) args[2]); - return new Match(text, max_width, ordered); + Type type = (args[2] == null ? Type.ORDERED : (Type) args[2]); + return new Match(text, max_width, type); }); static { PARSER.declareString(constructorArg(), new ParseField("text")); PARSER.declareInt(optionalConstructorArg(), new ParseField("max_width")); - PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered")); + PARSER.declareField(optionalConstructorArg(), (p, c) -> Type.valueOf(p.text()), + new ParseField("type"), ObjectParser.ValueType.STRING); } public static Match fromXContent(XContentParser parser) throws IOException { @@ -324,12 +200,12 @@ public Disjunction(StreamInput in) throws IOException { } @Override - public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { - List sources = new ArrayList<>(); + public ESIntervalsSource getSource(MappedFieldType fieldType) throws IOException { + List sources = new ArrayList<>(); for (IntervalsSourceProvider provider : subSources) { sources.add(provider.getSource(fieldType)); } - return disjunction(sources); + return ESIntervalsSource.disjunction(sources); } @Override @@ -365,18 +241,16 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder.endObject(); } - @SuppressWarnings("unchecked") - static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, - args -> { - List subSources = (List)args[0]; - return new Disjunction(subSources); - }); - static { - PARSER.declareObjectArray(constructorArg(), (p, c) -> IntervalsSourceProvider.fromXContent(p), new ParseField("sources")); - } - - public static Disjunction fromXContent(XContentParser parser) { - return PARSER.apply(parser, null); + public static Disjunction fromXContent(XContentParser parser) throws IOException { + if (parser.nextToken() != XContentParser.Token.START_ARRAY) { + throw new ParsingException(parser.getTokenLocation(), "Expected start array"); + } + List subSources = new ArrayList<>(); + do { + subSources.add(IntervalsSourceProvider.fromXContent(parser)); + } + while (parser.nextToken() != XContentParser.Token.END_ARRAY); + return new Disjunction(subSources); } } @@ -384,27 +258,6 @@ public static class Combine extends IntervalsSourceProvider { public static final String NAME = "combine"; - protected enum Type { - ORDERED { - @Override - IntervalsSource getSource(List subSources) { - return Intervals.ordered(subSources.toArray(new IntervalsSource[0])); - } - }, UNORDERED { - @Override - IntervalsSource getSource(List subSources) { - return Intervals.unordered(subSources.toArray(new IntervalsSource[0])); - } - }, BLOCK { - @Override - IntervalsSource getSource(List subSources) { - return Intervals.phrase(subSources.toArray(new IntervalsSource[0])); - } - }; - - abstract IntervalsSource getSource(List subSources); - } - private final List subSources; private final Type type; private final int maxWidth; @@ -422,16 +275,16 @@ public Combine(StreamInput in) throws IOException { } @Override - public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { - List ss = new ArrayList<>(); + public ESIntervalsSource getSource(MappedFieldType fieldType) throws IOException { + List ss = new ArrayList<>(); for (IntervalsSourceProvider provider : subSources) { ss.add(provider.getSource(fieldType)); } - IntervalsSource source = type.getSource(ss); + ESIntervalsSource source = type.source(ss); if (maxWidth == Integer.MAX_VALUE) { return source; } - return Intervals.maxwidth(maxWidth, source); + return ESIntervalsSource.maxwidth(maxWidth, source); } @Override @@ -501,7 +354,7 @@ public static class Relate extends IntervalsSourceProvider { public enum Relation { CONTAINING { @Override - IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { + ESIntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { return Intervals.containing(source, filter); } }, NOT_CONTAINING { @@ -525,7 +378,7 @@ IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { return Intervals.nonOverlapping(source, filter); } }; - abstract IntervalsSource getSource(IntervalsSource source, IntervalsSource filter); + abstract ESIntervalsSource getSource(ESIntervalsSource source, ESIntervalsSource filter); } private final IntervalsSourceProvider source; @@ -545,7 +398,7 @@ public Relate(StreamInput in) throws IOException { } @Override - public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { + public ESIntervalsSource getSource(MappedFieldType fieldType) throws IOException { IntervalsSource s = source.getSource(fieldType); IntervalsSource f = filter.getSource(fieldType); return relation.getSource(s, f); From 2a2244d7b7f7bdbf38ce1f41297444b7ad91ddf8 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sat, 1 Dec 2018 17:41:33 +0000 Subject: [PATCH 08/29] Extract IntervalBuilder and add tests for it --- .../index/mapper/MappedFieldType.java | 11 + .../index/mapper/TextFieldMapper.java | 11 + .../index/query/ESIntervalsSource.java | 423 ------------------ .../index/query/IntervalBuilder.java | 295 ++++++++++++ .../index/query/IntervalsSourceProvider.java | 105 ++--- .../index/query/IntervalBuilderTests.java | 142 ++++++ .../query/IntervalQueryBuilderTests.java | 29 +- 7 files changed, 529 insertions(+), 487 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/index/query/ESIntervalsSource.java create mode 100644 server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java create mode 100644 server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index eaafeefa7e0dd..6d093000b2e92 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -34,6 +34,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TermInSetQuery; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.intervals.IntervalsSource; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticsearchParseException; import org.elasticsearch.common.Nullable; @@ -374,6 +375,15 @@ public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolea + "] which is of type [" + typeName() + "]"); } + public enum IntervalType { + ORDERED, UNORDERED, PHRASE + } + + public IntervalsSource intervals(String text, IntervalType type) throws IOException { + throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name + + "] which is of type [" + typeName() + "]"); + } + /** * An enum used to describe the relation between the range of terms in a * shard when compared with a query range @@ -465,4 +475,5 @@ public static Term extractTerm(Query termQuery) { } return ((TermQuery) termQuery).getTerm(); } + } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index d0419a0e44b24..1fc8a064c2ecd 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -39,6 +39,7 @@ import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.intervals.IntervalsSource; import org.elasticsearch.Version; import org.elasticsearch.common.collect.Iterators; import org.elasticsearch.common.settings.Settings; @@ -48,6 +49,7 @@ import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData; +import org.elasticsearch.index.query.IntervalBuilder; import org.elasticsearch.index.query.QueryShardContext; import java.io.IOException; @@ -579,6 +581,15 @@ public Query existsQuery(QueryShardContext context) { } } + @Override + public IntervalsSource intervals(String text, IntervalType type) throws IOException { + if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { + throw new IllegalArgumentException("Cannot create source against field [" + name() + "] with no positions indexed"); + } + IntervalBuilder builder = new IntervalBuilder(name(), searchAnalyzer()); + return builder.analyzeText(text, type); + } + @Override public Query phraseQuery(String field, TokenStream stream, int slop, boolean enablePosIncrements) throws IOException { diff --git a/server/src/main/java/org/elasticsearch/index/query/ESIntervalsSource.java b/server/src/main/java/org/elasticsearch/index/query/ESIntervalsSource.java deleted file mode 100644 index 43e4abe0c41a3..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/query/ESIntervalsSource.java +++ /dev/null @@ -1,423 +0,0 @@ -package org.elasticsearch.index.query; - -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.CachingTokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; -import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.MatchesIterator; -import org.apache.lucene.search.intervals.IntervalIterator; -import org.apache.lucene.search.intervals.Intervals; -import org.apache.lucene.search.intervals.IntervalsSource; -import org.apache.lucene.search.spans.SpanNearQuery; -import org.apache.lucene.search.spans.SpanOrQuery; -import org.apache.lucene.search.spans.SpanQuery; -import org.apache.lucene.search.spans.SpanTermQuery; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.Iterator; -import java.util.List; -import java.util.Objects; -import java.util.Set; - -public abstract class ESIntervalsSource extends IntervalsSource { - - public abstract int length(); - - public abstract List subSources(); - - public static abstract class DelegatingIntervalsSource extends ESIntervalsSource { - - protected final IntervalsSource delegate; - - protected DelegatingIntervalsSource(IntervalsSource delegate) { - this.delegate = delegate; - } - - @Override - public final IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException { - return delegate.intervals(field, ctx); - } - - @Override - public final MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException { - return delegate.matches(field, ctx, doc); - } - - @Override - public final void extractTerms(String field, Set terms) { - delegate.extractTerms(field, terms); - } - - @Override - public final String toString() { - return delegate.toString(); - } - - @Override - public boolean equals(Object o) { - if (this == o) return true; - if (o == null || getClass() != o.getClass()) return false; - DisjunctionIntervalsSource that = (DisjunctionIntervalsSource) o; - return Objects.equals(delegate, that.delegate); - } - - @Override - public int hashCode() { - return Objects.hash(delegate); - } - } - - public static ESIntervalsSource disjunction(List subSources) { - if (subSources.size() == 0) { - return NO_INTERVALS; - } - if (subSources.size() == 1) { - return subSources.get(1); - } - return new DisjunctionIntervalsSource(subSources); - } - - private static class DisjunctionIntervalsSource extends DelegatingIntervalsSource { - - private final List subSources; - - public DisjunctionIntervalsSource(List subSources) { - super(Intervals.or(subSources.toArray(new IntervalsSource[0]))); - this.subSources = new ArrayList<>(subSources); - } - - @Override - public int length() { - int length = subSources.get(0).length(); - for (int i = 1; i < subSources.size(); i++) { - if (subSources.get(i).length() != length) { - return -1; - } - } - return length; - } - - @Override - public List subSources() { - return subSources; - } - } - - public static class TermIntervalsSource extends DelegatingIntervalsSource { - - public TermIntervalsSource(BytesRef term) { - super(Intervals.term(term)); - } - - @Override - public int length() { - return 1; - } - - @Override - public List subSources() { - return Collections.singletonList(this); - } - } - - public static class PhraseIntervalsSource extends DelegatingIntervalsSource { - - final int length; - final List subSources = new ArrayList<>(); - - public PhraseIntervalsSource(List terms) { - this(terms.toArray(new ESIntervalsSource[0])); - } - - public PhraseIntervalsSource(ESIntervalsSource... terms) { - super(Intervals.phrase(terms)); - subSources.addAll(Arrays.asList(terms)); - int length = 0; - for (ESIntervalsSource term : terms) { - if (term.length() > 0) { - length += term.length(); - } - else { - this.length = -1; - return; - } - } - this.length = length; - } - - @Override - public int length() { - return length; - } - - @Override - public List subSources() { - return subSources; - } - } - - public static ESIntervalsSource maxwidth(int maxWidth, ESIntervalsSource subSource) { - if (subSource.length() < 0) { - return new MaxWidthIntervalsSource(maxWidth, subSource); - } - if (subSource.length() > maxWidth) { - return NO_INTERVALS; - } - return subSource; - } - - public static class MaxWidthIntervalsSource extends DelegatingIntervalsSource { - - final ESIntervalsSource delegate; - - public MaxWidthIntervalsSource(int maxWidth, ESIntervalsSource delegate) { - super(Intervals.maxwidth(maxWidth, delegate)); - this.delegate = delegate; - } - - @Override - public int length() { - return delegate.length(); - } - - @Override - public List subSources() { - return delegate.subSources(); - } - } - - public static List analyzeQuery(String field, String query, Analyzer analyzer) throws IOException { - try (TokenStream ts = analyzer.tokenStream(field, query); - CachingTokenFilter stream = new CachingTokenFilter(ts)) { - - TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); - PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); - PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class); - - if (termAtt == null) { - return Collections.singletonList(NO_INTERVALS); - } - - // phase 1: read through the stream and assess the situation: - // counting the number of tokens/positions and marking if we have any synonyms. - - int numTokens = 0; - boolean hasSynonyms = false; - boolean isGraph = false; - - stream.reset(); - while (stream.incrementToken()) { - numTokens++; - int positionIncrement = posIncAtt.getPositionIncrement(); - if (positionIncrement == 0) { - hasSynonyms = true; - } - int positionLength = posLenAtt.getPositionLength(); - if (positionLength > 1) { - isGraph = true; - } - } - - // phase 2: based on token count, presence of synonyms, and options - // formulate a single term, boolean, or phrase. - - if (numTokens == 0) { - return null; - } else if (numTokens == 1) { - // single term - return Collections.singletonList(analyzeTerm(stream)); - } else if (isGraph) { - // graph - return analyzeGraph(stream); - } else { - // phrase - if (hasSynonyms) { - // phrase with single-term synonyms - return analyzeSynonyms(stream); - } else { - // simple phrase - return analyzeTerms(stream); - } - } - } - } - - public static ESIntervalsSource analyzeTerm(TokenStream ts) throws IOException { - TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); - ts.reset(); - ts.incrementToken(); - return new TermIntervalsSource(bytesAtt.getBytesRef()); - } - - public static List analyzeTerms(TokenStream ts) throws IOException { - List terms = new ArrayList<>(); - TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); - ts.reset(); - while (ts.incrementToken()) { - BytesRef term = bytesAtt.getBytesRef(); - terms.add(new TermIntervalsSource(BytesRef.deepCopyOf(term))); - } - ts.end(); - return terms; - } - - public static List analyzeSynonyms(TokenStream ts) throws IOException { - List terms = new ArrayList<>(); - List synonyms = new ArrayList<>(); - TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); - PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class); - ts.reset(); - while (ts.incrementToken()) { - if (posAtt.getPositionIncrement() == 1) { - if (synonyms.size() == 1) { - terms.add(synonyms.get(0)); - } - if (synonyms.size() > 1) { - terms.add(new DisjunctionIntervalsSource(synonyms)); - } - synonyms.clear(); - } - synonyms.add(new TermIntervalsSource(bytesAtt.getBytesRef())); - } - if (synonyms.size() == 1) { - terms.add(synonyms.get(0)); - } - else { - terms.add(new DisjunctionIntervalsSource(synonyms)); - } - return terms; - } - - public static List analyzeGraph(TokenStream source) throws IOException { - source.reset(); - GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source); - - List clauses = new ArrayList<>(); - int[] articulationPoints = graph.articulationPoints(); - int lastState = 0; - int maxClauseCount = BooleanQuery.getMaxClauseCount(); - for (int i = 0; i <= articulationPoints.length; i++) { - int start = lastState; - int end = -1; - if (i < articulationPoints.length) { - end = articulationPoints[i]; - } - lastState = end; - if (graph.hasSidePath(start)) { - List paths = new ArrayList<>(); - Iterator it = graph.getFiniteStrings(start, end); - while (it.hasNext()) { - TokenStream ts = it.next(); - ESIntervalsSource phrase = new PhraseIntervalsSource(analyzeTerms(ts)); - if (paths.size() >= maxClauseCount) { - throw new BooleanQuery.TooManyClauses(); - } - paths.add(phrase); - } - if (paths.size() > 0) { - clauses.add(new DisjunctionIntervalsSource(paths)); - } - } else { - Iterator it = graph.getFiniteStrings(start, end); - TokenStream ts = it.next(); - clauses.addAll(analyzeTerms(ts)); - assert it.hasNext() == false; - } - } - return clauses; - } - - public static final ESIntervalsSource NO_INTERVALS = new ESIntervalsSource() { - - @Override - public int length() { - return 0; - } - - @Override - public List subSources() { - return Collections.emptyList(); - } - - @Override - public IntervalIterator intervals(String field, LeafReaderContext ctx) { - return new IntervalIterator() { - @Override - public int start() { - return NO_MORE_INTERVALS; - } - - @Override - public int end() { - return NO_MORE_INTERVALS; - } - - @Override - public int nextInterval() { - return NO_MORE_INTERVALS; - } - - @Override - public float matchCost() { - return 0; - } - - @Override - public int docID() { - return NO_MORE_DOCS; - } - - @Override - public int nextDoc() { - return NO_MORE_DOCS; - } - - @Override - public int advance(int target) { - return NO_MORE_DOCS; - } - - @Override - public long cost() { - return 0; - } - }; - } - - @Override - public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) { - return null; - } - - @Override - public void extractTerms(String field, Set terms) { - - } - - @Override - public int hashCode() { - return 0; - } - - @Override - public boolean equals(Object other) { - return other == this; - } - - @Override - public String toString() { - return "no_match"; - } - }; - -} diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java new file mode 100644 index 0000000000000..e0e88342159f2 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -0,0 +1,295 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MatchesIterator; +import org.apache.lucene.search.intervals.IntervalIterator; +import org.apache.lucene.search.intervals.Intervals; +import org.apache.lucene.search.intervals.IntervalsSource; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings; +import org.elasticsearch.index.mapper.MappedFieldType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +/** + * Constructs an IntervalsSource based on analyzed text + */ +public class IntervalBuilder { + + private final String field; + private final Analyzer analyzer; + + public IntervalBuilder(String field, Analyzer analyzer) { + this.field = field; + this.analyzer = analyzer; + } + + public IntervalsSource analyzeText(String query, MappedFieldType.IntervalType type) throws IOException { + try (TokenStream ts = analyzer.tokenStream(field, query); + CachingTokenFilter stream = new CachingTokenFilter(ts)) { + return analyzeText(stream, type); + } + } + + protected IntervalsSource analyzeText(CachingTokenFilter stream, MappedFieldType.IntervalType type) throws IOException { + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); + PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class); + + if (termAtt == null) { + return NO_INTERVALS; + } + + // phase 1: read through the stream and assess the situation: + // counting the number of tokens/positions and marking if we have any synonyms. + + int numTokens = 0; + boolean hasSynonyms = false; + boolean isGraph = false; + + stream.reset(); + while (stream.incrementToken()) { + numTokens++; + int positionIncrement = posIncAtt.getPositionIncrement(); + if (positionIncrement == 0) { + hasSynonyms = true; + } + int positionLength = posLenAtt.getPositionLength(); + if (positionLength > 1) { + isGraph = true; + } + } + + // phase 2: based on token count, presence of synonyms, and options + // formulate a single term, boolean, or phrase. + + if (numTokens == 0) { + return null; + } else if (numTokens == 1) { + // single term + return analyzeTerm(stream); + } else if (isGraph) { + // graph + return combineSources(analyzeGraph(stream), type); + } else { + // phrase + if (hasSynonyms) { + // phrase with single-term synonyms + return analyzeSynonyms(stream, type); + } else { + // simple phrase + return combineSources(analyzeTerms(stream), type); + } + } + + } + + protected IntervalsSource analyzeTerm(TokenStream ts) throws IOException { + TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + ts.reset(); + ts.incrementToken(); + return Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef())); + } + + protected IntervalsSource combineSources(List sources, MappedFieldType.IntervalType type) { + if (sources.size() == 0) { + return NO_INTERVALS; + } + if (sources.size() == 1) { + return sources.get(0); + } + switch (type) { + case ORDERED: + return Intervals.ordered(sources.toArray(new IntervalsSource[0])); + case UNORDERED: + return Intervals.unordered(sources.toArray(new IntervalsSource[0])); + case PHRASE: + return Intervals.phrase(sources.toArray(new IntervalsSource[0])); + } + throw new IllegalStateException("Unknown interval type [" + type + "]"); + } + + protected List analyzeTerms(TokenStream ts) throws IOException { + List terms = new ArrayList<>(); + TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + BytesRef term = bytesAtt.getBytesRef(); + terms.add(Intervals.term(BytesRef.deepCopyOf(term))); + } + ts.end(); + return terms; + } + + protected IntervalsSource analyzeSynonyms(TokenStream ts, MappedFieldType.IntervalType type) throws IOException { + List terms = new ArrayList<>(); + List synonyms = new ArrayList<>(); + TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posAtt = ts.addAttribute(PositionIncrementAttribute.class); + ts.reset(); + while (ts.incrementToken()) { + if (posAtt.getPositionIncrement() == 1) { + if (synonyms.size() == 1) { + terms.add(synonyms.get(0)); + } + if (synonyms.size() > 1) { + terms.add(Intervals.or(synonyms.toArray(new IntervalsSource[0]))); + } + synonyms.clear(); + } + synonyms.add(Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef()))); + } + if (synonyms.size() == 1) { + terms.add(synonyms.get(0)); + } + else { + terms.add(Intervals.or(synonyms.toArray(new IntervalsSource[0]))); + } + return combineSources(terms, type); + } + + protected List analyzeGraph(TokenStream source) throws IOException { + source.reset(); + GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source); + + List clauses = new ArrayList<>(); + int[] articulationPoints = graph.articulationPoints(); + int lastState = 0; + int maxClauseCount = BooleanQuery.getMaxClauseCount(); + for (int i = 0; i <= articulationPoints.length; i++) { + int start = lastState; + int end = -1; + if (i < articulationPoints.length) { + end = articulationPoints[i]; + } + lastState = end; + if (graph.hasSidePath(start)) { + List paths = new ArrayList<>(); + Iterator it = graph.getFiniteStrings(start, end); + while (it.hasNext()) { + TokenStream ts = it.next(); + IntervalsSource phrase = combineSources(analyzeTerms(ts), MappedFieldType.IntervalType.PHRASE); + if (paths.size() >= maxClauseCount) { + throw new BooleanQuery.TooManyClauses(); + } + paths.add(phrase); + } + if (paths.size() > 0) { + clauses.add(Intervals.or(paths.toArray(new IntervalsSource[0]))); + } + } else { + Iterator it = graph.getFiniteStrings(start, end); + TokenStream ts = it.next(); + clauses.addAll(analyzeTerms(ts)); + assert it.hasNext() == false; + } + } + return clauses; + } + + public static final IntervalsSource NO_INTERVALS = new IntervalsSource() { + + @Override + public IntervalIterator intervals(String field, LeafReaderContext ctx) { + return new IntervalIterator() { + @Override + public int start() { + return NO_MORE_INTERVALS; + } + + @Override + public int end() { + return NO_MORE_INTERVALS; + } + + @Override + public int nextInterval() { + return NO_MORE_INTERVALS; + } + + @Override + public float matchCost() { + return 0; + } + + @Override + public int docID() { + return NO_MORE_DOCS; + } + + @Override + public int nextDoc() { + return NO_MORE_DOCS; + } + + @Override + public int advance(int target) { + return NO_MORE_DOCS; + } + + @Override + public long cost() { + return 0; + } + }; + } + + @Override + public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) { + return null; + } + + @Override + public void extractTerms(String field, Set terms) { + + } + + @Override + public int hashCode() { + return 0; + } + + @Override + public boolean equals(Object other) { + return other == this; + } + + @Override + public String toString() { + return "no_match"; + } + }; + +} diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 9ce0dd8c5860d..c4940ed12b57b 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -1,11 +1,5 @@ package org.elasticsearch.index.query; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.index.IndexOptions; -import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.MatchesIterator; -import org.apache.lucene.search.intervals.IntervalIterator; import org.apache.lucene.search.intervals.Intervals; import org.apache.lucene.search.intervals.IntervalsSource; import org.elasticsearch.common.ParseField; @@ -24,15 +18,15 @@ import java.util.ArrayList; import java.util.List; import java.util.Locale; +import java.util.Map; import java.util.Objects; -import java.util.Set; import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; public abstract class IntervalsSourceProvider implements NamedWriteable, ToXContentObject { - public abstract ESIntervalsSource getSource(MappedFieldType fieldType) throws IOException; + public abstract IntervalsSource getSource(MappedFieldType fieldType) throws IOException; @Override public abstract int hashCode(); @@ -69,30 +63,39 @@ public enum Type { ORDERED { @Override - public ESIntervalsSource source(List subSources) { - return null; + public IntervalsSource source(List subSources) { + if (subSources.size() == 1) { + return subSources.get(0); + } + return Intervals.ordered(subSources.toArray(new IntervalsSource[0])); } }, UNORDERED { @Override - public ESIntervalsSource source(List subSources) { - return null; + public IntervalsSource source(List subSources) { + if (subSources.size() == 1) { + return subSources.get(0); + } + return Intervals.unordered(subSources.toArray(new IntervalsSource[0])); } }, BLOCK { @Override - public ESIntervalsSource source(List subSources) { - return PHRASE.source(subSources); + public IntervalsSource source(List subSources) { + if (subSources.size() == 1) { + return subSources.get(0); + } + return Intervals.phrase(subSources.toArray(new IntervalsSource[0])); } }, PHRASE { @Override - public ESIntervalsSource source(List subSources) { - return new ESIntervalsSource.PhraseIntervalsSource(subSources); + public IntervalsSource source(List subSources) { + return BLOCK.source(subSources); } } ; - public abstract ESIntervalsSource source(List subSources); + public abstract IntervalsSource source(List subSources); } @@ -102,9 +105,9 @@ public static class Match extends IntervalsSourceProvider { private final String text; private final int maxWidth; - private final Type type; + private final MappedFieldType.IntervalType type; - public Match(String text, int maxWidth, Type type) { + public Match(String text, int maxWidth, MappedFieldType.IntervalType type) { this.text = text; this.maxWidth = maxWidth; this.type = type; @@ -113,21 +116,16 @@ public Match(String text, int maxWidth, Type type) { public Match(StreamInput in) throws IOException { this.text = in.readString(); this.maxWidth = in.readInt(); - this.type = in.readEnum(Type.class); + this.type = in.readEnum(MappedFieldType.IntervalType.class); } @Override - public ESIntervalsSource getSource(MappedFieldType fieldType) throws IOException { - if (fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { - throw new IllegalArgumentException("Cannot create source against field [" + fieldType.name() + "] with no positions indexed"); - } - Analyzer analyzer = fieldType.searchAnalyzer(); - List subSources = ESIntervalsSource.analyzeQuery(fieldType.name(), text, analyzer); - ESIntervalsSource combination = type.source(subSources); + public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { + IntervalsSource source = fieldType.intervals(text, type); if (maxWidth != Integer.MAX_VALUE) { - return ESIntervalsSource.maxwidth(maxWidth, combination); + return Intervals.maxwidth(maxWidth, source); } - return combination; + return source; } @Override @@ -162,7 +160,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(NAME); builder.field("text", text); builder.field("max_width", maxWidth); - builder.field("type", type.toString()); + builder.field("type", type.toString().toLowerCase(Locale.ROOT)); return builder.endObject().endObject(); } @@ -170,13 +168,16 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws args -> { String text = (String) args[0]; int max_width = (args[1] == null ? Integer.MAX_VALUE : (Integer) args[1]); - Type type = (args[2] == null ? Type.ORDERED : (Type) args[2]); + MappedFieldType.IntervalType type = (args[2] == null + ? MappedFieldType.IntervalType.UNORDERED + : (MappedFieldType.IntervalType) args[2]); return new Match(text, max_width, type); }); static { PARSER.declareString(constructorArg(), new ParseField("text")); PARSER.declareInt(optionalConstructorArg(), new ParseField("max_width")); - PARSER.declareField(optionalConstructorArg(), (p, c) -> Type.valueOf(p.text()), + PARSER.declareField(optionalConstructorArg(), + (p, c) -> MappedFieldType.IntervalType.valueOf(p.text().toUpperCase(Locale.ROOT)), new ParseField("type"), ObjectParser.ValueType.STRING); } @@ -200,12 +201,12 @@ public Disjunction(StreamInput in) throws IOException { } @Override - public ESIntervalsSource getSource(MappedFieldType fieldType) throws IOException { - List sources = new ArrayList<>(); + public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { + List sources = new ArrayList<>(); for (IntervalsSourceProvider provider : subSources) { sources.add(provider.getSource(fieldType)); } - return ESIntervalsSource.disjunction(sources); + return Intervals.or(sources.toArray(new IntervalsSource[0])); } @Override @@ -241,16 +242,18 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder.endObject(); } + @SuppressWarnings("unchecked") + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, + args -> { + List subSources = (List)args[0]; + return new Disjunction(subSources); + }); + static { + PARSER.declareObjectArray(constructorArg(), (p, c) -> IntervalsSourceProvider.fromXContent(p), new ParseField("sources")); + } + public static Disjunction fromXContent(XContentParser parser) throws IOException { - if (parser.nextToken() != XContentParser.Token.START_ARRAY) { - throw new ParsingException(parser.getTokenLocation(), "Expected start array"); - } - List subSources = new ArrayList<>(); - do { - subSources.add(IntervalsSourceProvider.fromXContent(parser)); - } - while (parser.nextToken() != XContentParser.Token.END_ARRAY); - return new Disjunction(subSources); + return PARSER.parse(parser, null); } } @@ -275,16 +278,16 @@ public Combine(StreamInput in) throws IOException { } @Override - public ESIntervalsSource getSource(MappedFieldType fieldType) throws IOException { - List ss = new ArrayList<>(); + public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { + List ss = new ArrayList<>(); for (IntervalsSourceProvider provider : subSources) { ss.add(provider.getSource(fieldType)); } - ESIntervalsSource source = type.source(ss); + IntervalsSource source = type.source(ss); if (maxWidth == Integer.MAX_VALUE) { return source; } - return ESIntervalsSource.maxwidth(maxWidth, source); + return Intervals.maxwidth(maxWidth, source); } @Override @@ -354,7 +357,7 @@ public static class Relate extends IntervalsSourceProvider { public enum Relation { CONTAINING { @Override - ESIntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { + IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { return Intervals.containing(source, filter); } }, NOT_CONTAINING { @@ -378,7 +381,7 @@ IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { return Intervals.nonOverlapping(source, filter); } }; - abstract ESIntervalsSource getSource(ESIntervalsSource source, ESIntervalsSource filter); + abstract IntervalsSource getSource(IntervalsSource source, IntervalsSource filter); } private final IntervalsSourceProvider source; @@ -398,7 +401,7 @@ public Relate(StreamInput in) throws IOException { } @Override - public ESIntervalsSource getSource(MappedFieldType fieldType) throws IOException { + public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { IntervalsSource s = source.getSource(fieldType); IntervalsSource f = filter.getSource(fieldType); return relation.getSource(s, f); diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java new file mode 100644 index 0000000000000..f7bcd13ce3e7c --- /dev/null +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java @@ -0,0 +1,142 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query; + +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.search.intervals.Intervals; +import org.apache.lucene.search.intervals.IntervalsSource; +import org.elasticsearch.index.mapper.MappedFieldType; +import org.elasticsearch.test.ESTestCase; + +import java.io.IOException; + +public class IntervalBuilderTests extends ESTestCase { + + private static final IntervalBuilder BUILDER = new IntervalBuilder("field1", new StandardAnalyzer()); + + public void testSimpleTerm() throws IOException { + + CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2)); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.ORDERED); + IntervalsSource expected = Intervals.term("term1"); + + assertEquals(expected, source); + } + + public void testOrdered() throws IOException { + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + new Token("term2", 3, 4), + new Token("term3", 5, 6) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.ORDERED); + IntervalsSource expected = Intervals.ordered( + Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3") + ); + + assertEquals(expected, source); + + } + + public void testUnordered() throws IOException { + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + new Token("term2", 3, 4), + new Token("term3", 5, 6) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.UNORDERED); + IntervalsSource expected = Intervals.unordered( + Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3") + ); + + assertEquals(expected, source); + + } + + public void testPhrase() throws IOException { + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + new Token("term2", 3, 4), + new Token("term3", 5, 6) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.PHRASE); + IntervalsSource expected = Intervals.phrase( + Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3") + ); + + assertEquals(expected, source); + + } + + public void testSimpleSynonyms() throws IOException { + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + new Token("term2", 3, 4), + new Token("term4", 0, 3, 4), + new Token("term3", 5, 6) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.ORDERED); + IntervalsSource expected = Intervals.ordered( + Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.term("term4")), Intervals.term("term3") + ); + + assertEquals(expected, source); + + } + + public void testGraphSynonyms() throws IOException { + + // term1 term2/term3:2 term4 term5 + + Token graphToken = new Token("term2", 3, 4); + graphToken.setPositionLength(2); + + CannedTokenStream ts = new CannedTokenStream( + new Token("term1", 1, 2), + graphToken, + new Token("term3", 0, 3, 4), + new Token("term4", 5, 6), + new Token("term5", 6, 7) + ); + + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.ORDERED); + IntervalsSource expected = Intervals.ordered( + Intervals.term("term1"), + Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), + Intervals.term("term5") + ); + + assertEquals(expected, source); + + } + +} diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index 026602e9036e1..152fe33938c33 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -3,6 +3,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.intervals.IntervalQuery; import org.apache.lucene.search.intervals.Intervals; +import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.test.AbstractQueryTestCase; @@ -45,7 +46,9 @@ private IntervalsSourceProvider createRandomSource() { words.add(randomRealisticUnicodeOfLengthBetween(4, 20)); } String text = String.join(" ", words); - return new IntervalsSourceProvider.Match(text, randomBoolean() ? Integer.MAX_VALUE : randomIntBetween(1, 20), randomBoolean()); + int mtypeOrd = randomInt(MappedFieldType.IntervalType.values().length - 1); + MappedFieldType.IntervalType type = MappedFieldType.IntervalType.values()[mtypeOrd]; + return new IntervalsSourceProvider.Match(text, randomBoolean() ? Integer.MAX_VALUE : randomIntBetween(1, 20), type); } } @@ -81,7 +84,7 @@ public void testMatchInterval() throws IOException { "{ \"field\" : \"" + STRING_FIELD_NAME + "\"," + " \"source\" : { \"match\" : { " + " \"text\" : \"Hello world\"," + - " \"ordered\" : \"true\" } } } }"; + " \"type\" : \"ordered\" } } } }"; builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.ordered(Intervals.term("hello"), Intervals.term("world"))); @@ -89,23 +92,23 @@ public void testMatchInterval() throws IOException { } - public void testCombineInterval() throws IOException { - + public void testOrInterval() throws IOException { String json = "{ \"intervals\" : " + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + " \"source\" : { " + - " \"combine\" : {" + - " \"type\" : \"or\"," + - " \"sources\" : [" + + " \"or\" : {" + + " \"sources\": [" + " { \"match\" : { \"text\" : \"one\" } }," + - " { \"match\" : { \"text\" : \"two\" } } ]," + - " \"max_width\" : 30 } } } }"; + " { \"match\" : { \"text\" : \"two\" } } ] } } } }"; IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); Query expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.maxwidth(30, Intervals.or(Intervals.term("one"), Intervals.term("two")))); + Intervals.or(Intervals.term("one"), Intervals.term("two"))); assertEquals(expected, builder.toQuery(createShardContext())); + } - json = "{ \"intervals\" : " + + public void testCombineInterval() throws IOException { + + String json = "{ \"intervals\" : " + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + " \"source\" : { " + " \"combine\" : {" + @@ -118,8 +121,8 @@ public void testCombineInterval() throws IOException { " { \"match\" : { \"text\" : \"two\" } }," + " { \"match\" : { \"text\" : \"three\" } } ] } } ]," + " \"max_width\" : 30 } } } }"; - builder = (IntervalQueryBuilder) parseQuery(json); - expected = new IntervalQuery(STRING_FIELD_NAME, + IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); + Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.maxwidth(30, Intervals.ordered( Intervals.term("one"), Intervals.unordered(Intervals.term("two"), Intervals.term("three"))))); From 6e5339dcc2514aec54e1319e9f2e7e6acd903918 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sat, 1 Dec 2018 17:49:08 +0000 Subject: [PATCH 09/29] Fix eq/hashcode in Disjunction --- .../index/query/IntervalsSourceProvider.java | 11 +++++++---- .../index/query/IntervalQueryBuilderTests.java | 6 ++++++ 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index c4940ed12b57b..f85fc116c69b4 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -210,13 +210,16 @@ public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { } @Override - public int hashCode() { - return 0; + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + Disjunction that = (Disjunction) o; + return Objects.equals(subSources, that.subSources); } @Override - public boolean equals(Object other) { - return false; + public int hashCode() { + return Objects.hash(subSources); } @Override diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index 152fe33938c33..090c951e86902 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -29,6 +29,12 @@ private IntervalsSourceProvider createRandomSource() { int relOrd = randomInt(IntervalsSourceProvider.Relate.Relation.values().length - 1); return new IntervalsSourceProvider.Relate(source1, source2, IntervalsSourceProvider.Relate.Relation.values()[relOrd]); case 1: + int orCount = randomInt(4) + 1; + List orSources = new ArrayList<>(); + for (int i = 0; i < orCount; i++) { + orSources.add(createRandomSource()); + } + return new IntervalsSourceProvider.Disjunction(orSources); case 2: case 3: int count = randomInt(5) + 1; From 52bcf1fe68e28c910885c3773e60e09135731beb Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sat, 1 Dec 2018 17:55:40 +0000 Subject: [PATCH 10/29] New yaml test --- .../test/search/230_interval_query.yml | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml index da6ea5269123e..a1c86843f055a 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml @@ -22,7 +22,9 @@ setup: - '{"index": {"_index": "test", "_type": "test", "_id": "2"}}' - '{"text" : "Its cold outside, theres no kind of atmosphere"}' - '{"index": {"_index": "test", "_type": "test", "_id": "3"}}' - - '{"text" : "Baby its cold outside"}' + - '{"text" : "Baby its cold there outside"}' + - '{"index": {"_index": "test", "_type": "test", "_id": "4"}}' + - '{"text" : "Outside it is cold and wet"}' - do: search: @@ -36,3 +38,16 @@ setup: ordered: true - match: count: { hits.total: 2 } + + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + match: "cold outside" + ordered: false + - match: + count: { hits.total: 3 } \ No newline at end of file From 6f2c73c3c3096df32d8904c6e2a578c98f0a634f Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sun, 2 Dec 2018 08:32:42 +0000 Subject: [PATCH 11/29] checkstyle --- .../index/query/IntervalsSourceProvider.java | 15 +++++++++------ .../index/query/IntervalQueryBuilderTests.java | 3 ++- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index f85fc116c69b4..928a9d93309cc 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -18,7 +18,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Objects; import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; @@ -36,25 +35,29 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont public static IntervalsSourceProvider fromXContent(XContentParser parser) throws IOException { if (parser.currentToken() != XContentParser.Token.START_OBJECT) { - throw new ParsingException(parser.getTokenLocation(), "Malformed IntervalsSource definition, expected start_object"); + throw new ParsingException(parser.getTokenLocation(), + "Malformed IntervalsSource definition, expected [start_object] but found [" + parser.currentToken() + "]"); } if (parser.nextToken() != XContentParser.Token.FIELD_NAME) { - throw new ParsingException(parser.getTokenLocation(), "Malformed IntervalsSource definition, no field after start_object"); + throw new ParsingException(parser.getTokenLocation(), + "Malformed IntervalsSource definition, no field after start_object"); } String sourceType = parser.currentName(); if (parser.nextToken() != XContentParser.Token.START_OBJECT) { - throw new ParsingException(parser.getTokenLocation(), "Malformed IntervalsSource definition, expected start_object after source name"); + throw new ParsingException(parser.getTokenLocation(), + "Malformed IntervalsSource definition, expected [start_object] after source name but found [" + + parser.currentToken() + "]"); } IntervalsSourceProvider provider = parser.namedObject(IntervalsSourceProvider.class, sourceType, null); //end_object of the specific query (e.g. match, multi_match etc.) element if (parser.currentToken() != XContentParser.Token.END_OBJECT) { throw new ParsingException(parser.getTokenLocation(), - "[" + sourceType + "] malformed source, expected [END_OBJECT] but found [" + parser.currentToken() + "]"); + "[" + sourceType + "] malformed source, expected [end_object] but found [" + parser.currentToken() + "]"); } //end_object of the query object if (parser.nextToken() != XContentParser.Token.END_OBJECT) { throw new ParsingException(parser.getTokenLocation(), - "[" + sourceType + "] malformed source, expected [END_OBJECT] but found [" + parser.currentToken() + "]"); + "[" + sourceType + "] malformed source, expected [end_object] but found [" + parser.currentToken() + "]"); } return provider; } diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index 090c951e86902..a673fb23fceec 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -217,6 +217,7 @@ public void testNonIndexedFields() { IntervalQueryBuilder builder = new IntervalQueryBuilder(STRING_FIELD_NAME_2, provider); builder.doToQuery(createShardContext()); }); - assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" + STRING_FIELD_NAME_2 + "] with no indexed positions")); + assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" + + STRING_FIELD_NAME_2 + "] with no indexed positions")); } } From f044495055a45b785168ac9f0c041d91e1232e9a Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sun, 2 Dec 2018 10:14:06 +0000 Subject: [PATCH 12/29] license headers --- .../index/query/IntervalQueryBuilder.java | 19 +++++++++++++++++++ .../index/query/IntervalsSourceProvider.java | 19 +++++++++++++++++++ .../query/IntervalQueryBuilderTests.java | 19 +++++++++++++++++++ 3 files changed, 57 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java index 140c2e63d900d..4e42d0cc89c09 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java @@ -1,3 +1,22 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.elasticsearch.index.query; import org.apache.lucene.index.IndexOptions; diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 928a9d93309cc..47bc8a3cf7d6a 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -1,3 +1,22 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.elasticsearch.index.query; import org.apache.lucene.search.intervals.Intervals; diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index a673fb23fceec..fc9a6c156428d 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -1,3 +1,22 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + package org.elasticsearch.index.query; import org.apache.lucene.search.Query; From 1377bccde2e904ef9ccb4e194c6805e1ada8f8dc Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sun, 2 Dec 2018 12:27:29 +0000 Subject: [PATCH 13/29] test fix --- .../test/java/org/elasticsearch/search/SearchModuleTests.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java index cf5b3fc0fc13b..321d50278b8be 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java @@ -328,6 +328,7 @@ public List> getRescorers() { "geo_polygon", "geo_shape", "ids", + "intervals", "match", "match_all", "match_none", From 036813367dd5b1d9c1ec7cd30241d913747d150c Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sun, 2 Dec 2018 12:47:54 +0000 Subject: [PATCH 14/29] YAML format --- .../rest-api-spec/test/search/230_interval_query.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml index a1c86843f055a..1534ec383e7b4 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml @@ -50,4 +50,6 @@ setup: match: "cold outside" ordered: false - match: - count: { hits.total: 3 } \ No newline at end of file + count: { hits.total: 3 } + + From 9c2f035d3e60cfcbc243913afe823f6ca9d6b4ba Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Sun, 2 Dec 2018 14:42:23 +0000 Subject: [PATCH 15/29] YAML formatting again --- .../test/search/230_interval_query.yml | 23 +++++++++++-------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml index 1534ec383e7b4..414b00d2489e7 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml @@ -1,7 +1,7 @@ setup: - skip: - - version: " - 6.99.99" - - reason: "Implemented in 7.0" + version: " - 6.99.99" + reason: "Implemented in 7.0" - do: indices.create: @@ -26,6 +26,8 @@ setup: - '{"index": {"_index": "test", "_type": "test", "_id": "4"}}' - '{"text" : "Outside it is cold and wet"}' +--- +"Test ordered matching": - do: search: index: test @@ -34,11 +36,13 @@ setup: intervals: field: text source: - match: "cold outside" - ordered: true - - match: - count: { hits.total: 2 } + match: + text: "cold outside" + type: ordered + - match: { hits.total: 2 } +--- +"Test default unordered matching": - do: search: index: test @@ -47,9 +51,8 @@ setup: intervals: field: text source: - match: "cold outside" - ordered: false - - match: - count: { hits.total: 3 } + match: + text: "cold outside" + - match: { hits.total: 3 } From 7cde116484722a59c97d8c024251aeb0c7624d40 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 3 Dec 2018 14:44:31 +0000 Subject: [PATCH 16/29] yaml tests; javadoc --- rest | 5 + .../test/search/230_interval_query.yml | 271 ++++++++++++++++++ .../index/mapper/MappedFieldType.java | 8 +- .../index/query/IntervalBuilder.java | 2 +- .../index/query/IntervalQueryBuilder.java | 3 + .../index/query/IntervalsSourceProvider.java | 91 +++--- .../query/IntervalQueryBuilderTests.java | 4 +- 7 files changed, 336 insertions(+), 48 deletions(-) create mode 100755 rest diff --git a/rest b/rest new file mode 100755 index 0000000000000..c14ab370c47ae --- /dev/null +++ b/rest @@ -0,0 +1,5 @@ +#!/bin/bash + +./gradlew :distribution:archives:integ-test-zip:integTest \ + -Dtests.class="org.elasticsearch.test.rest.*Yaml*IT" \ + -Dtests.method="test {p0=$1}" diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml index 414b00d2489e7..96099351f710e 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml @@ -55,4 +55,275 @@ setup: text: "cold outside" - match: { hits.total: 3 } +--- +"Test explicit unordered matching": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + match: + text: "cold outside" + type: "unordered" + - match: { hits.total: 3 } + +--- +"Test phrase matching": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + match: + text: "cold outside" + type: "phrase" + - match: { hits.total: 1 } + +--- +"Test unordered max_width matching": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + match: + text: "cold outside" + max_width: 3 + - match: { hits.total: 2 } + +--- +"Test ordered max_width matching": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + match: + text: "cold outside" + max_width: 2 + type: ordered + - match: { hits.total: 1 } + +--- +"Test ordered combination with max_width": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + combine: + sources: + - match: + text: "cold" + - match: + text: "outside" + max_width: 2 + type: ordered + - match: { hits.total: 1 } + +--- +"Test ordered combination": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + combine: + sources: + - match: + text: "cold" + - match: + text: "outside" + type: ordered + - match: { hits.total: 2 } + +--- +"Test unordered combination": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + combine: + sources: + - match: + text: "cold" + - match: + text: "outside" + max_width: 3 + type: unordered + - match: { hits.total: 2 } + +--- +"Test block combination": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + combine: + sources: + - match: + text: "cold" + - match: + text: "outside" + type: block + - match: { hits.total: 1 } + + +--- +"Test containing": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + relate: + source: + combine: + sources: + - match: + text: "cold" + - match: + text: "outside" + type: unordered + filter: + match: + text: "is" + relation: containing + - match: { hits.total: 1 } + + +--- +"Test not containing": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + relate: + source: + combine: + sources: + - match: + text: "cold" + - match: + text: "outside" + type: unordered + filter: + match: + text: "is" + relation: not_containing + - match: { hits.total: 2 } + +--- +"Test contained_by": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + relate: + source: + match: + text: "is" + filter: + combine: + sources: + - match: + text: "cold" + - match: + text: "outside" + type: unordered + relation: contained_by + - match: { hits.total: 1 } + +--- +"Test not_contained_by": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + relate: + source: + match: + text: "it" + filter: + combine: + sources: + - match: + text: "cold" + - match: + text: "outside" + type: unordered + relation: not_contained_by + - match: { hits.total: 1 } + +--- +"Test not_overlapping": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + relate: + source: + combine: + sources: + - match: + text: "cold" + - match: + text: "outside" + type: ordered + filter: + combine: + sources: + - match: + text: "baby" + - match: + text: "there" + type: unordered + relation: not_overlapping + - match: { hits.total: 1 } + diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index 6d093000b2e92..f616ad26ab546 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -375,10 +375,16 @@ public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolea + "] which is of type [" + typeName() + "]"); } + /** + * Types of proximity matching, decreasing in strictness + */ public enum IntervalType { - ORDERED, UNORDERED, PHRASE + PHRASE, ORDERED, UNORDERED } + /** + * Create an {@link IntervalsSource} to be used for proximity queries + */ public IntervalsSource intervals(String text, IntervalType type) throws IOException { throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java index e0e88342159f2..4a2a95107d2e2 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -164,7 +164,7 @@ protected IntervalsSource analyzeSynonyms(TokenStream ts, MappedFieldType.Interv if (synonyms.size() == 1) { terms.add(synonyms.get(0)); } - if (synonyms.size() > 1) { + else if (synonyms.size() > 1) { terms.add(Intervals.or(synonyms.toArray(new IntervalsSource[0]))); } synonyms.clear(); diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java index 4e42d0cc89c09..7aeffa670eeba 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java @@ -35,6 +35,9 @@ import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; +/** + * Builder for {@link IntervalQuery} + */ public class IntervalQueryBuilder extends AbstractQueryBuilder { public static final String NAME = "intervals"; diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 47bc8a3cf7d6a..6d9c9edaa09f7 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -42,6 +42,15 @@ import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; +/** + * Factory class for {@link IntervalsSource} + * + * Built-in sources include {@link Match}, which analyzes a text string and converts it + * to a proximity source (phrase, ordered or unordered depending on how + * strict the matching should be); {@link Combine}, which allows proximity queries + * between different sub-sources; and {@link Relate}, which allows sources to be filtered + * by their relation to other sources. + */ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXContentObject { public abstract IntervalsSource getSource(MappedFieldType fieldType) throws IOException; @@ -81,46 +90,6 @@ public static IntervalsSourceProvider fromXContent(XContentParser parser) throws return provider; } - public enum Type { - - ORDERED { - @Override - public IntervalsSource source(List subSources) { - if (subSources.size() == 1) { - return subSources.get(0); - } - return Intervals.ordered(subSources.toArray(new IntervalsSource[0])); - } - }, - UNORDERED { - @Override - public IntervalsSource source(List subSources) { - if (subSources.size() == 1) { - return subSources.get(0); - } - return Intervals.unordered(subSources.toArray(new IntervalsSource[0])); - } - }, - BLOCK { - @Override - public IntervalsSource source(List subSources) { - if (subSources.size() == 1) { - return subSources.get(0); - } - return Intervals.phrase(subSources.toArray(new IntervalsSource[0])); - } - }, - PHRASE { - @Override - public IntervalsSource source(List subSources) { - return BLOCK.source(subSources); - } - } ; - - public abstract IntervalsSource source(List subSources); - - } - public static class Match extends IntervalsSourceProvider { public static final String NAME = "match"; @@ -282,22 +251,56 @@ public static Disjunction fromXContent(XContentParser parser) throws IOException } } + public enum CombineType { + + ORDERED { + @Override + public IntervalsSource source(List subSources) { + if (subSources.size() == 1) { + return subSources.get(0); + } + return Intervals.ordered(subSources.toArray(new IntervalsSource[0])); + } + }, + UNORDERED { + @Override + public IntervalsSource source(List subSources) { + if (subSources.size() == 1) { + return subSources.get(0); + } + return Intervals.unordered(subSources.toArray(new IntervalsSource[0])); + } + }, + BLOCK { + @Override + public IntervalsSource source(List subSources) { + if (subSources.size() == 1) { + return subSources.get(0); + } + return Intervals.phrase(subSources.toArray(new IntervalsSource[0])); + } + }; + + public abstract IntervalsSource source(List subSources); + + } + public static class Combine extends IntervalsSourceProvider { public static final String NAME = "combine"; private final List subSources; - private final Type type; + private final CombineType type; private final int maxWidth; - public Combine(List subSources, Type type, int maxWidth) { + public Combine(List subSources, CombineType type, int maxWidth) { this.subSources = subSources; this.type = type; this.maxWidth = maxWidth; } public Combine(StreamInput in) throws IOException { - this.type = in.readEnum(Type.class); + this.type = in.readEnum(CombineType.class); this.subSources = in.readNamedWriteableList(IntervalsSourceProvider.class); this.maxWidth = in.readInt(); } @@ -359,7 +362,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws @SuppressWarnings("unchecked") static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, args -> { - Type type = Type.valueOf(((String)args[0]).toUpperCase(Locale.ROOT)); + CombineType type = CombineType.valueOf(((String)args[0]).toUpperCase(Locale.ROOT)); List subSources = (List)args[1]; Integer maxWidth = (args[2] == null ? Integer.MAX_VALUE : (Integer)args[2]); return new Combine(subSources, type, maxWidth); diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index fc9a6c156428d..9cb77e8e132d3 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -61,9 +61,9 @@ private IntervalsSourceProvider createRandomSource() { for (int i = 0; i < count; i++) { subSources.add(createRandomSource()); } - int typeOrd = randomInt(IntervalsSourceProvider.Combine.Type.values().length - 1); + int typeOrd = randomInt(IntervalsSourceProvider.CombineType.values().length - 1); int width = randomBoolean() ? Integer.MAX_VALUE : randomIntBetween(count, 100); - return new IntervalsSourceProvider.Combine(subSources, IntervalsSourceProvider.Combine.Type.values()[typeOrd], width); + return new IntervalsSourceProvider.Combine(subSources, IntervalsSourceProvider.CombineType.values()[typeOrd], width); default: int wordCount = randomInt(4) + 1; List words = new ArrayList<>(); From dabdd7789a7e507eee378b07027b7bea183ae29e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Mon, 3 Dec 2018 17:41:35 +0000 Subject: [PATCH 17/29] Add OR test -> requires fix from LUCENE-8586 --- .../test/search/230_interval_query.yml | 23 +++++++++++++++++++ .../query/IntervalQueryBuilderTests.java | 23 +++++++++++++++++++ .../search/query/SearchQueryIT.java | 23 +++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml index 96099351f710e..db4dd32b789c5 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml @@ -116,6 +116,29 @@ setup: type: ordered - match: { hits.total: 1 } +--- +"Test ordered combination with disjunction": + - do: + search: + index: test + body: + query: + intervals: + field: text + source: + combine: + sources: + - or: + sources: + - match: + text: "cold" + - match: + text: "outside" + - match: + text: "atmosphere" + type: ordered + - match: { hits.total: 1 } + --- "Test ordered combination with max_width": - do: diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index 9cb77e8e132d3..399da1a834e99 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -155,6 +155,29 @@ public void testCombineInterval() throws IOException { } + public void testCombineDisjunctionInterval() throws IOException { + String json = "{ \"intervals\" : " + + "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + + " \"source\" : { " + + " \"combine\" : {" + + " \"type\" : \"ordered\"," + + " \"sources\" : [" + + " { \"match\" : { \"text\" : \"atmosphere\" } }," + + " { \"or\" : {" + + " \"sources\" : [" + + " { \"match\" : { \"text\" : \"cold\" } }," + + " { \"match\" : { \"text\" : \"outside\" } } ] } } ]," + + " \"max_width\" : 30 } } } }"; + + IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); + Query expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.maxwidth(30, Intervals.ordered( + Intervals.term("atmosphere"), + Intervals.or(Intervals.term("cold"), Intervals.term("outside")) + ))); + assertEquals(expected, builder.toQuery(createShardContext())); + } + public void testRelateIntervals() throws IOException { String json = "{ \"intervals\" : " + diff --git a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java index 6068f89025994..d58bd7ec6ac6b 100644 --- a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -1291,6 +1291,29 @@ public void testMustNot() throws IOException, ExecutionException, InterruptedExc assertHitCount(searchResponse, 2L); } + public void testIntervals() throws InterruptedException { + createIndex("test"); + + indexRandom(true, + client().prepareIndex("test", "test", "1") + .setSource("description", "it's cold outside, there's no kind of atmosphere")); + + String json = "{ \"intervals\" : " + + "{ \"field\" : \"description\", " + + " \"source\" : { " + + " \"combine\" : {" + + " \"type\" : \"ordered\"," + + " \"sources\" : [" + + " { \"or\" : {" + + " \"sources\" : [" + + " { \"match\" : { \"text\" : \"cold\" } }," + + " { \"match\" : { \"text\" : \"outside\" } } ] } }," + + " { \"match\" : { \"text\" : \"atmosphere\" } } ]," + + " \"max_width\" : 30 } } } }"; + SearchResponse response = client().prepareSearch("test").setQuery(wrapperQuery(json)).get(); + assertHitCount(response, 1L); + } + // see #2994 public void testSimpleSpan() throws IOException, ExecutionException, InterruptedException { createIndex("test"); From 122f192f6900f89db20c358d8c9dee8c9229e295 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 5 Dec 2018 14:09:19 +0000 Subject: [PATCH 18/29] Add docs --- .../query-dsl/full-text-queries.asciidoc | 7 + .../query-dsl/intervals-query.asciidoc | 124 ++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100644 docs/reference/query-dsl/intervals-query.asciidoc diff --git a/docs/reference/query-dsl/full-text-queries.asciidoc b/docs/reference/query-dsl/full-text-queries.asciidoc index aaa0a911372c8..f9714c1be3c8e 100644 --- a/docs/reference/query-dsl/full-text-queries.asciidoc +++ b/docs/reference/query-dsl/full-text-queries.asciidoc @@ -40,6 +40,11 @@ The queries in this group are: A simpler, more robust version of the `query_string` syntax suitable for exposing directly to users. +<>:: + + A full text query that allows fine-grained control of the ordering and + proximity of matching terms + include::match-query.asciidoc[] include::match-phrase-query.asciidoc[] @@ -53,3 +58,5 @@ include::common-terms-query.asciidoc[] include::query-string-query.asciidoc[] include::simple-query-string-query.asciidoc[] + +include::intervals-query.asciidoc[] diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc new file mode 100644 index 0000000000000..ad6ed8b6853d9 --- /dev/null +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -0,0 +1,124 @@ +[[intervals-query]] +=== Intervals query + +An `intervals` query allows fine-grained control over the order and proximity of +matching terms. Matching rules are constructed from a small set of `source` +objects, and the rules are then applied to terms from a particular `field`. + +The source definitions produce sequences of intervals that span terms in a +body of text. These intervals can be further combined and filtered by +parent sources. + +The example below will search for the phrase `my favourite food` appearing +before the terms `hot` and `water` or `cold` and `porridge` in any order, in +the field `my_text` + +[source,js] +-------------------------------------------------- +POST _search +{ + "query": { + "intervals" : { + "field" : "my_text", + "source" : { + "combine" : { + "type" : "ordered", + "sources" : [ + "match" : { + "text" : "my favourite food", + "type" : "phrase" + }, + "or" : { + "sources" : [ + "match" : { "text" : "hot water" }, + "match" : { "text" : "cold porridge" } + ] + } + ] + } + } + "boost" : 1.0 + } + } +} +-------------------------------------------------- +// CONSOLE + +In the above example, the text `my favourite food is cold porridge` would +match because the two intervals matching `my favourite food` and `cold +porridge` appear in the correct order, but the text `when it's cold my +favourite food is porridge` would not match, because the interval matching +`cold porridge` starts before the interval matching `my favourite food`. + +==== `match` source + +The `match` source matches analyzed text, and takes the following parameters: + +[horizontal] +`text`:: +The text to match. It will be analyzed using the search analyzer configured +on the top-level query's field. +`max_width`:: +Specify a maximum distance between the terms in the text. Terms that appear +further apart than this distance will not match. Note that the terms themselves +are included in the width. If unspecified then there is no width restriction +on the match. +`type`:: +An optional restriction on how the terms in the text appear in the document. +Can be `phrase` (the terms must appear consecutively and in-order), `ordered` +and `unordered` (the default). + +==== `combine` source + +The `combine` will match subsources that have specific ordering and proximity +relations to each other. When nested, the intervals produced by this source +span all their subsources. + +[horizontal] +`sources`:: +An array of sources to combine. All subsources must produce a match in a +document for the overall source to match. +`max_width`:: +Specify a maximum width covered by the matching sources - combinations that +match across a distance greater than this width will not match. Note that the +width of the subsources are included here, so a combination of `black sheep` +and `yes sir` will have a minimum width of `4`. If unspecified then there is +no width restriction on the match. +`type`:: +An optional restriction on how the subsources in the match appear in the +document. Can be `block` (the subsources must appear consecutively and in-order), +`ordered` and `unordered` (the default). + +==== `or` source + +The `or` source will match any of its nested sub-sources. + +[horizontal] +`sources`:: +An array of sources to match + +==== `relate` source + +The `relate` source will filter a source by its relation to another source. +The resulting intervals are taken directly from the `source`. + +[horizontal] +`source`:: +The source to filter +`filter`:: +The source to filter by +`relation`:: +How the filter should be applied + +The following relations are available: +[horizontal] +`containing`:: +Produces intervals that contain an interval from the filter source +`contained_by`:: +Produces intervals that are contained by an interval from the filter source +`not_containing`:: +Produces intervals that do not contain an interval from the filter source +`not_contained_by`:: +Produces intervals that are not contained by an interval from the filter source +`not_overlapping`:: +Produces intervals that do not overlap with an interval from the filter source \ No newline at end of file From 22f99b46794b0d72cc2457b5324dfc325ce3a4af Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 11 Dec 2018 12:02:26 +0000 Subject: [PATCH 19/29] Re-do API --- .../index/mapper/MappedFieldType.java | 9 +- .../index/mapper/TextFieldMapper.java | 6 +- .../index/query/IntervalBuilder.java | 37 +- .../index/query/IntervalQueryBuilder.java | 60 ++- .../index/query/IntervalsSourceProvider.java | 415 +++++++++--------- .../elasticsearch/plugins/SearchPlugin.java | 20 - .../elasticsearch/search/SearchModule.java | 108 +++-- .../index/query/IntervalBuilderTests.java | 12 +- .../query/IntervalQueryBuilderTests.java | 257 +++++------ 9 files changed, 457 insertions(+), 467 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java index f616ad26ab546..741b2300a4678 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappedFieldType.java @@ -375,17 +375,10 @@ public Query multiPhraseQuery(String field, TokenStream stream, int slop, boolea + "] which is of type [" + typeName() + "]"); } - /** - * Types of proximity matching, decreasing in strictness - */ - public enum IntervalType { - PHRASE, ORDERED, UNORDERED - } - /** * Create an {@link IntervalsSource} to be used for proximity queries */ - public IntervalsSource intervals(String text, IntervalType type) throws IOException { + public IntervalsSource intervals(String query, int max_gaps, boolean ordered, NamedAnalyzer analyzer) throws IOException { throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name + "] which is of type [" + typeName() + "]"); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 1fc8a064c2ecd..a5ab1931cdc29 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -582,12 +582,12 @@ public Query existsQuery(QueryShardContext context) { } @Override - public IntervalsSource intervals(String text, IntervalType type) throws IOException { + public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException { if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { throw new IllegalArgumentException("Cannot create source against field [" + name() + "] with no positions indexed"); } - IntervalBuilder builder = new IntervalBuilder(name(), searchAnalyzer()); - return builder.analyzeText(text, type); + IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer); + return builder.analyzeText(text, maxGaps, ordered); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java index 4a2a95107d2e2..02afab2ec53d1 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -55,14 +55,14 @@ public IntervalBuilder(String field, Analyzer analyzer) { this.analyzer = analyzer; } - public IntervalsSource analyzeText(String query, MappedFieldType.IntervalType type) throws IOException { + public IntervalsSource analyzeText(String query, int maxGaps, boolean ordered) throws IOException { try (TokenStream ts = analyzer.tokenStream(field, query); CachingTokenFilter stream = new CachingTokenFilter(ts)) { - return analyzeText(stream, type); + return analyzeText(stream, maxGaps, ordered); } } - protected IntervalsSource analyzeText(CachingTokenFilter stream, MappedFieldType.IntervalType type) throws IOException { + protected IntervalsSource analyzeText(CachingTokenFilter stream, int maxGaps, boolean ordered) throws IOException { TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); @@ -102,15 +102,15 @@ protected IntervalsSource analyzeText(CachingTokenFilter stream, MappedFieldType return analyzeTerm(stream); } else if (isGraph) { // graph - return combineSources(analyzeGraph(stream), type); + return combineSources(analyzeGraph(stream), maxGaps, ordered); } else { // phrase if (hasSynonyms) { // phrase with single-term synonyms - return analyzeSynonyms(stream, type); + return analyzeSynonyms(stream, maxGaps, ordered); } else { // simple phrase - return combineSources(analyzeTerms(stream), type); + return combineSources(analyzeTerms(stream), maxGaps, ordered); } } @@ -123,22 +123,23 @@ protected IntervalsSource analyzeTerm(TokenStream ts) throws IOException { return Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef())); } - protected IntervalsSource combineSources(List sources, MappedFieldType.IntervalType type) { + public static IntervalsSource combineSources(List sources, int maxGaps, boolean ordered) { if (sources.size() == 0) { return NO_INTERVALS; } if (sources.size() == 1) { return sources.get(0); } - switch (type) { - case ORDERED: - return Intervals.ordered(sources.toArray(new IntervalsSource[0])); - case UNORDERED: - return Intervals.unordered(sources.toArray(new IntervalsSource[0])); - case PHRASE: - return Intervals.phrase(sources.toArray(new IntervalsSource[0])); + IntervalsSource[] sourcesArray = sources.toArray(new IntervalsSource[0]); + if (maxGaps == 0 && ordered) { + return Intervals.phrase(sourcesArray); } - throw new IllegalStateException("Unknown interval type [" + type + "]"); + IntervalsSource inner = ordered ? Intervals.ordered(sourcesArray) : Intervals.unordered(sourcesArray); + if (maxGaps == -1) { + return inner; + } + // norelease + return Intervals.maxwidth(maxGaps, inner); // TODO Change this to maxgaps when lucene snapshot upgraded } protected List analyzeTerms(TokenStream ts) throws IOException { @@ -153,7 +154,7 @@ protected List analyzeTerms(TokenStream ts) throws IOException return terms; } - protected IntervalsSource analyzeSynonyms(TokenStream ts, MappedFieldType.IntervalType type) throws IOException { + protected IntervalsSource analyzeSynonyms(TokenStream ts, int maxGaps, boolean ordered) throws IOException { List terms = new ArrayList<>(); List synonyms = new ArrayList<>(); TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class); @@ -177,7 +178,7 @@ else if (synonyms.size() > 1) { else { terms.add(Intervals.or(synonyms.toArray(new IntervalsSource[0]))); } - return combineSources(terms, type); + return combineSources(terms, maxGaps, ordered); } protected List analyzeGraph(TokenStream source) throws IOException { @@ -200,7 +201,7 @@ protected List analyzeGraph(TokenStream source) throws IOExcept Iterator it = graph.getFiniteStrings(start, end); while (it.hasNext()) { TokenStream ts = it.next(); - IntervalsSource phrase = combineSources(analyzeTerms(ts), MappedFieldType.IntervalType.PHRASE); + IntervalsSource phrase = combineSources(analyzeTerms(ts), 0, true); if (paths.size() >= maxClauseCount) { throw new BooleanQuery.TooManyClauses(); } diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java index 7aeffa670eeba..84fa750c34527 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java @@ -23,6 +23,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.intervals.IntervalQuery; import org.elasticsearch.common.ParseField; +import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.ConstructingObjectParser; @@ -65,23 +66,56 @@ protected void doWriteTo(StreamOutput out) throws IOException { @Override protected void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); - builder.field("field", field); - builder.field("source", sourceProvider); + builder.field(field); + builder.startObject(); + sourceProvider.toXContent(builder, params); printBoostAndQueryName(builder); builder.endObject(); - } - - private static final ConstructingObjectParser PARSER - = new ConstructingObjectParser<>(NAME, args -> new IntervalQueryBuilder((String) args[0], (IntervalsSourceProvider) args[1])); - static { - PARSER.declareString(constructorArg(), new ParseField("field")); - PARSER.declareObject(constructorArg(), (parser, c) -> IntervalsSourceProvider.fromXContent(parser), new ParseField("source")); - PARSER.declareFloat(IntervalQueryBuilder::boost, new ParseField("boost")); - PARSER.declareString(IntervalQueryBuilder::queryName, new ParseField("_name")); + builder.endObject(); } public static IntervalQueryBuilder fromXContent(XContentParser parser) throws IOException { - return PARSER.apply(parser, null); + if (parser.nextToken() != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), "Expected [FIELD_NAME] but got [" + parser.currentToken() + "]"); + } + String field = parser.currentName(); + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "Expected [START_OBJECT] but got [" + parser.currentToken() + "]"); + } + String name = null; + float boost = 1; + IntervalsSourceProvider provider = null; + while (parser.nextToken() != XContentParser.Token.END_OBJECT) { + if (parser.currentToken() != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), + "Expected [FIELD_NAME] but got [" + parser.currentToken() + "]"); + } + switch (parser.currentName()) { + case "_name": + parser.nextToken(); + name = parser.text(); + break; + case "boost": + parser.nextToken(); + boost = parser.floatValue(); + break; + default: + provider = IntervalsSourceProvider.fromXContent(parser); + + } + } + if (parser.nextToken() != XContentParser.Token.END_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), + "Expected [END_OBJECT] but got [" + parser.currentToken() + "]"); + } + if (provider == null) { + throw new ParsingException(parser.getTokenLocation(), "Missing intervals from interval query definition"); + } + IntervalQueryBuilder builder = new IntervalQueryBuilder(field, provider); + builder.queryName(name); + builder.boost(boost); + return builder; + } @Override @@ -94,7 +128,7 @@ protected Query doToQuery(QueryShardContext context) throws IOException { fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { throw new IllegalArgumentException("Cannot create IntervalQuery over field [" + field + "] with no indexed positions"); } - return new IntervalQuery(field, sourceProvider.getSource(fieldType)); + return new IntervalQuery(field, sourceProvider.getSource(context, fieldType)); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 6d9c9edaa09f7..61961f9775516 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -26,11 +26,14 @@ import org.elasticsearch.common.io.stream.NamedWriteable; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; import org.elasticsearch.common.xcontent.ConstructingObjectParser; -import org.elasticsearch.common.xcontent.ObjectParser; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.ToXContentFragment; import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; +import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.mapper.MappedFieldType; import java.io.IOException; @@ -48,12 +51,12 @@ * Built-in sources include {@link Match}, which analyzes a text string and converts it * to a proximity source (phrase, ordered or unordered depending on how * strict the matching should be); {@link Combine}, which allows proximity queries - * between different sub-sources; and {@link Relate}, which allows sources to be filtered + * between different sub-sources; and {@link Filter}, which allows sources to be filtered * by their relation to other sources. */ -public abstract class IntervalsSourceProvider implements NamedWriteable, ToXContentObject { +public abstract class IntervalsSourceProvider implements NamedWriteable, ToXContentFragment { - public abstract IntervalsSource getSource(MappedFieldType fieldType) throws IOException; + public abstract IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException; @Override public abstract int hashCode(); @@ -62,59 +65,65 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont public abstract boolean equals(Object other); public static IntervalsSourceProvider fromXContent(XContentParser parser) throws IOException { - if (parser.currentToken() != XContentParser.Token.START_OBJECT) { - throw new ParsingException(parser.getTokenLocation(), - "Malformed IntervalsSource definition, expected [start_object] but found [" + parser.currentToken() + "]"); - } + assert parser.currentToken() == XContentParser.Token.FIELD_NAME; + switch (parser.currentName()) { + case "match": + return Match.fromXContent(parser); + case "any_of": + return Disjunction.fromXContent(parser); + case "all_of": + return Combine.fromXContent(parser); + } + throw new ParsingException(parser.getTokenLocation(), + "Unknown interval type [" + parser.currentName() + "], expecting one of [match, any_of, all_of]"); + } + + private static IntervalsSourceProvider parseInnerIntervals(XContentParser parser) throws IOException { if (parser.nextToken() != XContentParser.Token.FIELD_NAME) { - throw new ParsingException(parser.getTokenLocation(), - "Malformed IntervalsSource definition, no field after start_object"); - } - String sourceType = parser.currentName(); - if (parser.nextToken() != XContentParser.Token.START_OBJECT) { - throw new ParsingException(parser.getTokenLocation(), - "Malformed IntervalsSource definition, expected [start_object] after source name but found [" - + parser.currentToken() + "]"); - } - IntervalsSourceProvider provider = parser.namedObject(IntervalsSourceProvider.class, sourceType, null); - //end_object of the specific query (e.g. match, multi_match etc.) element - if (parser.currentToken() != XContentParser.Token.END_OBJECT) { - throw new ParsingException(parser.getTokenLocation(), - "[" + sourceType + "] malformed source, expected [end_object] but found [" + parser.currentToken() + "]"); - } - //end_object of the query object + throw new ParsingException(parser.getTokenLocation(), "Expected [FIELD_NAME] but got [" + parser.currentToken() + "]"); + } + IntervalsSourceProvider isp = IntervalsSourceProvider.fromXContent(parser); if (parser.nextToken() != XContentParser.Token.END_OBJECT) { - throw new ParsingException(parser.getTokenLocation(), - "[" + sourceType + "] malformed source, expected [end_object] but found [" + parser.currentToken() + "]"); + throw new ParsingException(parser.getTokenLocation(), "Expected [END_OBJECT] but got [" + parser.currentToken() + "]"); } - return provider; + return isp; } public static class Match extends IntervalsSourceProvider { public static final String NAME = "match"; - private final String text; - private final int maxWidth; - private final MappedFieldType.IntervalType type; - - public Match(String text, int maxWidth, MappedFieldType.IntervalType type) { - this.text = text; - this.maxWidth = maxWidth; - this.type = type; + private final String query; + private final int maxGaps; + private final boolean ordered; + private final String analyzer; + private final IntervalFilter filter; + + public Match(String query, int maxGaps, boolean ordered, String analyzer, IntervalFilter filter) { + this.query = query; + this.maxGaps = maxGaps; + this.ordered = ordered; + this.analyzer = analyzer; + this.filter = filter; } public Match(StreamInput in) throws IOException { - this.text = in.readString(); - this.maxWidth = in.readInt(); - this.type = in.readEnum(MappedFieldType.IntervalType.class); + this.query = in.readString(); + this.maxGaps = in.readVInt(); + this.ordered = in.readBoolean(); + this.analyzer = in.readOptionalString(); + this.filter = in.readOptionalWriteable(IntervalFilter::new); } @Override - public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { - IntervalsSource source = fieldType.intervals(text, type); - if (maxWidth != Integer.MAX_VALUE) { - return Intervals.maxwidth(maxWidth, source); + public IntervalsSource getSource(QueryShardContext context, MappedFieldType fieldType) throws IOException { + NamedAnalyzer analyzer = null; + if (this.analyzer != null) { + analyzer = context.getMapperService().getIndexAnalyzers().get(this.analyzer); + } + IntervalsSource source = fieldType.intervals(query, maxGaps, ordered, analyzer); + if (filter != null) { + return filter.filter(source, context, fieldType); } return source; } @@ -124,13 +133,16 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; Match match = (Match) o; - return Objects.equals(text, match.text) && Objects.equals(maxWidth, match.maxWidth) - && Objects.equals(type, match.type); + return maxGaps == match.maxGaps && + ordered == match.ordered && + Objects.equals(query, match.query) && + Objects.equals(filter, match.filter) && + Objects.equals(analyzer, match.analyzer); } @Override public int hashCode() { - return Objects.hash(text, maxWidth, type); + return Objects.hash(query, maxGaps, ordered, analyzer, filter); } @Override @@ -140,64 +152,79 @@ public String getWriteableName() { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeString(text); - out.writeInt(maxWidth); - out.writeEnum(type); + out.writeString(query); + out.writeVInt(maxGaps); + out.writeBoolean(ordered); + out.writeOptionalString(analyzer); + out.writeOptionalWriteable(filter); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.field(NAME); builder.startObject(); - builder.startObject(NAME); - builder.field("text", text); - builder.field("max_width", maxWidth); - builder.field("type", type.toString().toLowerCase(Locale.ROOT)); - return builder.endObject().endObject(); + builder.field("query", query); + builder.field("max_gaps", maxGaps); + builder.field("ordered", ordered); + if (analyzer != null) { + builder.field("analyzer", analyzer); + } + if (filter != null) { + builder.field("filter", filter); + } + return builder.endObject(); } private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, args -> { - String text = (String) args[0]; - int max_width = (args[1] == null ? Integer.MAX_VALUE : (Integer) args[1]); - MappedFieldType.IntervalType type = (args[2] == null - ? MappedFieldType.IntervalType.UNORDERED - : (MappedFieldType.IntervalType) args[2]); - return new Match(text, max_width, type); + String query = (String) args[0]; + int max_gaps = (args[1] == null ? -1 : (Integer) args[1]); + boolean ordered = (args[2] != null && (boolean) args[2]); + String analyzer = (String) args[3]; + IntervalFilter filter = (IntervalFilter) args[4]; + return new Match(query, max_gaps, ordered, analyzer, filter); }); static { - PARSER.declareString(constructorArg(), new ParseField("text")); - PARSER.declareInt(optionalConstructorArg(), new ParseField("max_width")); - PARSER.declareField(optionalConstructorArg(), - (p, c) -> MappedFieldType.IntervalType.valueOf(p.text().toUpperCase(Locale.ROOT)), - new ParseField("type"), ObjectParser.ValueType.STRING); + PARSER.declareString(constructorArg(), new ParseField("query")); + PARSER.declareInt(optionalConstructorArg(), new ParseField("max_gaps")); + PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered")); + PARSER.declareString(optionalConstructorArg(), new ParseField("analyzer")); + PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), new ParseField("filter")); } - public static Match fromXContent(XContentParser parser) throws IOException { + public static Match fromXContent(XContentParser parser) { return PARSER.apply(parser, null); } } public static class Disjunction extends IntervalsSourceProvider { - public static final String NAME = "or"; + public static final String NAME = "any_of"; private final List subSources; + private final IntervalFilter filter; - public Disjunction(List subSources) { + public Disjunction(List subSources, IntervalFilter filter) { this.subSources = subSources; + this.filter = filter; } public Disjunction(StreamInput in) throws IOException { this.subSources = in.readNamedWriteableList(IntervalsSourceProvider.class); + this.filter = in.readOptionalWriteable(IntervalFilter::new); } @Override - public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { + public IntervalsSource getSource(QueryShardContext ctx, MappedFieldType fieldType) throws IOException { List sources = new ArrayList<>(); for (IntervalsSourceProvider provider : subSources) { - sources.add(provider.getSource(fieldType)); + sources.add(provider.getSource(ctx, fieldType)); + } + IntervalsSource source = Intervals.or(sources.toArray(new IntervalsSource[0])); + if (filter == null) { + return source; } - return Intervals.or(sources.toArray(new IntervalsSource[0])); + return filter.filter(source, ctx, fieldType); } @Override @@ -221,18 +248,22 @@ public String getWriteableName() { @Override public void writeTo(StreamOutput out) throws IOException { out.writeNamedWriteableList(subSources); + out.writeOptionalWriteable(filter); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); builder.startObject(NAME); - builder.startArray("sources"); + builder.startArray("intervals"); for (IntervalsSourceProvider provider : subSources) { + builder.startObject(); provider.toXContent(builder, params); + builder.endObject(); } builder.endArray(); - builder.endObject(); + if (filter != null) { + builder.field("filter", filter); + } return builder.endObject(); } @@ -240,10 +271,14 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, args -> { List subSources = (List)args[0]; - return new Disjunction(subSources); + IntervalFilter filter = (IntervalFilter) args[1]; + return new Disjunction(subSources, filter); }); static { - PARSER.declareObjectArray(constructorArg(), (p, c) -> IntervalsSourceProvider.fromXContent(p), new ParseField("sources")); + PARSER.declareObjectArray(constructorArg(), (p, c) -> IntervalsSourceProvider.parseInnerIntervals(p), + new ParseField("intervals")); + PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), + new ParseField("filter")); } public static Disjunction fromXContent(XContentParser parser) throws IOException { @@ -251,71 +286,40 @@ public static Disjunction fromXContent(XContentParser parser) throws IOException } } - public enum CombineType { - - ORDERED { - @Override - public IntervalsSource source(List subSources) { - if (subSources.size() == 1) { - return subSources.get(0); - } - return Intervals.ordered(subSources.toArray(new IntervalsSource[0])); - } - }, - UNORDERED { - @Override - public IntervalsSource source(List subSources) { - if (subSources.size() == 1) { - return subSources.get(0); - } - return Intervals.unordered(subSources.toArray(new IntervalsSource[0])); - } - }, - BLOCK { - @Override - public IntervalsSource source(List subSources) { - if (subSources.size() == 1) { - return subSources.get(0); - } - return Intervals.phrase(subSources.toArray(new IntervalsSource[0])); - } - }; - - public abstract IntervalsSource source(List subSources); - - } - public static class Combine extends IntervalsSourceProvider { - public static final String NAME = "combine"; + public static final String NAME = "all_of"; private final List subSources; - private final CombineType type; - private final int maxWidth; + private final boolean ordered; + private final int maxGaps; + private final IntervalFilter filter; - public Combine(List subSources, CombineType type, int maxWidth) { + public Combine(List subSources, boolean ordered, int maxGaps, IntervalFilter filter) { this.subSources = subSources; - this.type = type; - this.maxWidth = maxWidth; + this.ordered = ordered; + this.maxGaps = maxGaps; + this.filter = filter; } public Combine(StreamInput in) throws IOException { - this.type = in.readEnum(CombineType.class); + this.ordered = in.readBoolean(); this.subSources = in.readNamedWriteableList(IntervalsSourceProvider.class); - this.maxWidth = in.readInt(); + this.maxGaps = in.readInt(); + this.filter = in.readOptionalWriteable(IntervalFilter::new); } @Override - public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { + public IntervalsSource getSource(QueryShardContext ctx, MappedFieldType fieldType) throws IOException { List ss = new ArrayList<>(); for (IntervalsSourceProvider provider : subSources) { - ss.add(provider.getSource(fieldType)); + ss.add(provider.getSource(ctx, fieldType)); } - IntervalsSource source = type.source(ss); - if (maxWidth == Integer.MAX_VALUE) { - return source; + IntervalsSource source = IntervalBuilder.combineSources(ss, maxGaps, ordered); + if (filter != null) { + return filter.filter(source, ctx, fieldType); } - return Intervals.maxwidth(maxWidth, source); + return source; } @Override @@ -324,12 +328,12 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; Combine combine = (Combine) o; return Objects.equals(subSources, combine.subSources) && - type == combine.type && maxWidth == combine.maxWidth; + ordered == combine.ordered && maxGaps == combine.maxGaps; } @Override public int hashCode() { - return Objects.hash(subSources, type, maxWidth); + return Objects.hash(subSources, ordered, maxGaps); } @Override @@ -339,38 +343,45 @@ public String getWriteableName() { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeEnum(type); + out.writeBoolean(ordered); out.writeNamedWriteableList(subSources); - out.writeInt(maxWidth); + out.writeInt(maxGaps); + out.writeOptionalWriteable(filter); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { - builder.startObject(); builder.startObject(NAME); - builder.field("type", type.toString().toLowerCase(Locale.ROOT)); - builder.field("max_width", maxWidth); - builder.startArray("sources"); + builder.field("ordered", ordered); + builder.field("max_gaps", maxGaps); + builder.startArray("intervals"); for (IntervalsSourceProvider provider : subSources) { + builder.startObject(); provider.toXContent(builder, params); + builder.endObject(); } builder.endArray(); - builder.endObject(); + if (filter != null) { + builder.field("filter", filter); + } return builder.endObject(); } @SuppressWarnings("unchecked") static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, args -> { - CombineType type = CombineType.valueOf(((String)args[0]).toUpperCase(Locale.ROOT)); + boolean ordered = (args[0] != null && (boolean) args[0]); List subSources = (List)args[1]; - Integer maxWidth = (args[2] == null ? Integer.MAX_VALUE : (Integer)args[2]); - return new Combine(subSources, type, maxWidth); + Integer maxGaps = (args[2] == null ? -1 : (Integer)args[2]); + IntervalFilter filter = (IntervalFilter) args[3]; + return new Combine(subSources, ordered, maxGaps, filter); }); static { - PARSER.declareString(constructorArg(), new ParseField("type")); - PARSER.declareObjectArray(constructorArg(), (p, c) -> IntervalsSourceProvider.fromXContent(p), new ParseField("sources")); - PARSER.declareInt(optionalConstructorArg(), new ParseField("max_width")); + PARSER.declareBoolean(constructorArg(), new ParseField("ordered")); + PARSER.declareObjectArray(constructorArg(), (p, c) -> IntervalsSourceProvider.parseInnerIntervals(p), + new ParseField("intervals")); + PARSER.declareInt(optionalConstructorArg(), new ParseField("max_gaps")); + PARSER.declareObject(optionalConstructorArg(), (p, c) -> IntervalFilter.fromXContent(p), new ParseField("filter")); } public static Combine fromXContent(XContentParser parser) { @@ -378,115 +389,91 @@ public static Combine fromXContent(XContentParser parser) { } } - public static class Relate extends IntervalsSourceProvider { - - public static final String NAME = "relate"; - - public enum Relation { - CONTAINING { - @Override - IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { - return Intervals.containing(source, filter); - } - }, NOT_CONTAINING { - @Override - IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { - return Intervals.notContaining(source, filter); - } - }, CONTAINED_BY { - @Override - IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { - return Intervals.containedBy(source, filter); - } - }, NOT_CONTAINED_BY { - @Override - IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { - return Intervals.notContainedBy(source, filter); - } - }, NOT_OVERLAPPING { - @Override - IntervalsSource getSource(IntervalsSource source, IntervalsSource filter) { - return Intervals.nonOverlapping(source, filter); - } - }; - abstract IntervalsSource getSource(IntervalsSource source, IntervalsSource filter); - } - - private final IntervalsSourceProvider source; + public static class IntervalFilter implements ToXContent, Writeable { + + public static final String NAME = "filter"; + + private final String type; private final IntervalsSourceProvider filter; - private final Relation relation; - public Relate(IntervalsSourceProvider source, IntervalsSourceProvider filter, Relation relation) { - this.source = source; + public IntervalFilter(IntervalsSourceProvider filter, String type) { this.filter = filter; - this.relation = relation; + this.type = type.toLowerCase(Locale.ROOT); } - public Relate(StreamInput in) throws IOException { - this.source = in.readNamedWriteable(IntervalsSourceProvider.class); + public IntervalFilter(StreamInput in) throws IOException { + this.type = in.readString(); this.filter = in.readNamedWriteable(IntervalsSourceProvider.class); - this.relation = in.readEnum(Relation.class); } - @Override - public IntervalsSource getSource(MappedFieldType fieldType) throws IOException { - IntervalsSource s = source.getSource(fieldType); - IntervalsSource f = filter.getSource(fieldType); - return relation.getSource(s, f); + public IntervalsSource filter(IntervalsSource input, QueryShardContext context, MappedFieldType fieldType) throws IOException { + IntervalsSource filterSource = filter.getSource(context, fieldType); + switch (type) { + case "containing": + return Intervals.containing(input, filterSource); + case "contained_by": + return Intervals.containedBy(input, filterSource); + case "not_containing": + return Intervals.notContaining(input, filterSource); + case "not_contained_by": + return Intervals.notContainedBy(input, filterSource); + case "not_overlapping": + return Intervals.nonOverlapping(input, filterSource); + default: + throw new IllegalArgumentException("Unknown filter type [" + type + "]"); + } } @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; - Relate relate = (Relate) o; - return Objects.equals(source, relate.source) && - Objects.equals(filter, relate.filter) && - relation == relate.relation; + IntervalFilter that = (IntervalFilter) o; + return Objects.equals(type, that.type) && + Objects.equals(filter, that.filter); } @Override public int hashCode() { - return Objects.hash(source, filter, relation); - } - - @Override - public String getWriteableName() { - return NAME; + return Objects.hash(type, filter); } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeNamedWriteable(source); + out.writeString(type); out.writeNamedWriteable(filter); - out.writeEnum(relation); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); - builder.startObject(NAME); - builder.field("source", source); - builder.field("filter", filter); - builder.field("relation", relation.toString().toLowerCase(Locale.ROOT)); + builder.field(type); + builder.startObject(); + filter.toXContent(builder, params); builder.endObject(); builder.endObject(); return builder; } - static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>(NAME, - args -> { - Relation relation = Relation.valueOf(((String)args[2]).toUpperCase(Locale.ROOT)); - return new Relate((IntervalsSourceProvider)args[0], (IntervalsSourceProvider)args[1], relation); - }); - static { - PARSER.declareObject(constructorArg(), (p, c) -> IntervalsSourceProvider.fromXContent(p), new ParseField("source")); - PARSER.declareObject(constructorArg(), (p, c) -> IntervalsSourceProvider.fromXContent(p), new ParseField("filter")); - PARSER.declareString(constructorArg(), new ParseField("relation")); - } - - public static Relate fromXContent(XContentParser parser) { - return PARSER.apply(parser, null); + public static IntervalFilter fromXContent(XContentParser parser) throws IOException { + if (parser.nextToken() != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), "Expected [FIELD_NAME] but got [" + parser.currentToken() + "]"); + } + String type = parser.currentName(); + if (parser.nextToken() != XContentParser.Token.START_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "Expected [START_OBJECT] but got [" + parser.currentToken() + "]"); + } + if (parser.nextToken() != XContentParser.Token.FIELD_NAME) { + throw new ParsingException(parser.getTokenLocation(), "Expected [FIELD_NAME] but got [" + parser.currentToken() + "]"); + } + IntervalsSourceProvider intervals = IntervalsSourceProvider.fromXContent(parser); + if (parser.nextToken() != XContentParser.Token.END_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "Expected [END_OBJECT] but got [" + parser.currentToken() + "]"); + } + if (parser.nextToken() != XContentParser.Token.END_OBJECT) { + throw new ParsingException(parser.getTokenLocation(), "Expected [END_OBJECT] but got [" + parser.currentToken() + "]"); + } + return new IntervalFilter(intervals, type); } } diff --git a/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java b/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java index 8c651baf8ba20..c6bca83ccb30b 100644 --- a/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java +++ b/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java @@ -133,12 +133,6 @@ default List getPipelineAggregations() { default List> getRescorers() { return emptyList(); } - /** - * The new {@link IntervalsSourceProvider}s added by this plugin - */ - default List> getIntervalsSourceProviders() { - return emptyList(); - } /** * Specification of custom {@link ScoreFunction}. @@ -247,20 +241,6 @@ public QuerySpec(String name, Writeable.Reader reader, QueryParser parser) } } - /** - * Specification of custom {@link IntervalsSourceProvider} - */ - class IntervalSpec extends SearchExtensionSpec> { - - /** - * Specification of custom {@link IntervalsSourceProvider} - */ - public IntervalSpec(String name, Writeable.Reader reader, CheckedFunction parser) { - super(name, reader, parser); - } - - } - /** * Specification for an {@link Aggregation}. */ diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index 81d86e7f6e5f0..2531685b94557 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -155,60 +155,41 @@ import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.UnmappedTerms; import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalAvg; import org.elasticsearch.search.aggregations.metrics.CardinalityAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalCardinality; +import org.elasticsearch.search.aggregations.metrics.ExtendedStatsAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.GeoBoundsAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalGeoBounds; import org.elasticsearch.search.aggregations.metrics.GeoCentroidAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.InternalAvg; +import org.elasticsearch.search.aggregations.metrics.InternalCardinality; +import org.elasticsearch.search.aggregations.metrics.InternalExtendedStats; +import org.elasticsearch.search.aggregations.metrics.InternalGeoBounds; import org.elasticsearch.search.aggregations.metrics.InternalGeoCentroid; +import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentileRanks; +import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentiles; import org.elasticsearch.search.aggregations.metrics.InternalMax; -import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.InternalMedianAbsoluteDeviation; import org.elasticsearch.search.aggregations.metrics.InternalMin; +import org.elasticsearch.search.aggregations.metrics.InternalScriptedMetric; +import org.elasticsearch.search.aggregations.metrics.InternalStats; +import org.elasticsearch.search.aggregations.metrics.InternalSum; +import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentileRanks; +import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentiles; +import org.elasticsearch.search.aggregations.metrics.InternalTopHits; +import org.elasticsearch.search.aggregations.metrics.InternalValueCount; +import org.elasticsearch.search.aggregations.metrics.InternalWeightedAvg; +import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.MedianAbsoluteDeviationAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.MinAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.PercentileRanksAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.PercentilesAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentileRanks; -import org.elasticsearch.search.aggregations.metrics.InternalHDRPercentiles; -import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentileRanks; -import org.elasticsearch.search.aggregations.metrics.InternalTDigestPercentiles; -import org.elasticsearch.search.aggregations.metrics.InternalScriptedMetric; import org.elasticsearch.search.aggregations.metrics.ScriptedMetricAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalStats; import org.elasticsearch.search.aggregations.metrics.StatsAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.ExtendedStatsAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalExtendedStats; -import org.elasticsearch.search.aggregations.metrics.InternalSum; import org.elasticsearch.search.aggregations.metrics.SumAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalTopHits; import org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalValueCount; import org.elasticsearch.search.aggregations.metrics.ValueCountAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalWeightedAvg; import org.elasticsearch.search.aggregations.metrics.WeightedAvgAggregationBuilder; -import org.elasticsearch.search.aggregations.metrics.InternalMedianAbsoluteDeviation; -import org.elasticsearch.search.aggregations.metrics.MedianAbsoluteDeviationAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.InternalSimpleValue; -import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.InternalBucketMetricValue; import org.elasticsearch.search.aggregations.pipeline.AvgBucketPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.AvgBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.MinBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.MinBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.InternalPercentilesBucket; -import org.elasticsearch.search.aggregations.pipeline.PercentilesBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.PercentilesBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.InternalStatsBucket; -import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketParser; -import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketPipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.InternalExtendedStatsBucket; -import org.elasticsearch.search.aggregations.pipeline.SumBucketPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.SumBucketPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.BucketScriptPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.BucketScriptPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.BucketSelectorPipelineAggregationBuilder; @@ -219,19 +200,38 @@ import org.elasticsearch.search.aggregations.pipeline.CumulativeSumPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.DerivativePipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.DerivativePipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.InternalDerivative; -import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregationBuilder; -import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.EwmaModel; +import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketParser; +import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.ExtendedStatsBucketPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.HoltLinearModel; import org.elasticsearch.search.aggregations.pipeline.HoltWintersModel; +import org.elasticsearch.search.aggregations.pipeline.InternalBucketMetricValue; +import org.elasticsearch.search.aggregations.pipeline.InternalDerivative; +import org.elasticsearch.search.aggregations.pipeline.InternalExtendedStatsBucket; +import org.elasticsearch.search.aggregations.pipeline.InternalPercentilesBucket; +import org.elasticsearch.search.aggregations.pipeline.InternalSimpleValue; +import org.elasticsearch.search.aggregations.pipeline.InternalStatsBucket; import org.elasticsearch.search.aggregations.pipeline.LinearModel; +import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.MaxBucketPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.MinBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.MinBucketPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.MovAvgModel; -import org.elasticsearch.search.aggregations.pipeline.SimpleModel; +import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.MovFnPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.MovFnPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.PercentilesBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.PercentilesBucketPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.SerialDiffPipelineAggregationBuilder; import org.elasticsearch.search.aggregations.pipeline.SerialDiffPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.SimpleModel; +import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.StatsBucketPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.SumBucketPipelineAggregationBuilder; +import org.elasticsearch.search.aggregations.pipeline.SumBucketPipelineAggregator; import org.elasticsearch.search.fetch.FetchPhase; import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.subphase.DocValueFieldsFetchSubPhase; @@ -303,7 +303,6 @@ public SearchModule(Settings settings, boolean transportClient, List getNamedWriteables() { @@ -815,16 +815,13 @@ private void registerQueryParsers(List plugins) { registerFromPlugin(plugins, SearchPlugin::getQueries, this::registerQuery); } - private void registerIntervalsSourceProviders(List plugins) { - registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Match.NAME, - IntervalsSourceProvider.Match::new, IntervalsSourceProvider.Match::fromXContent)); - registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Disjunction.NAME, - IntervalsSourceProvider.Disjunction::new, IntervalsSourceProvider.Disjunction::fromXContent)); - registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Combine.NAME, - IntervalsSourceProvider.Combine::new, IntervalsSourceProvider.Combine::fromXContent)); - registerIntervalsSourceProvider(new SearchPlugin.IntervalSpec<>(IntervalsSourceProvider.Relate.NAME, - IntervalsSourceProvider.Relate::new, IntervalsSourceProvider.Relate::fromXContent)); - registerFromPlugin(plugins, SearchPlugin::getIntervalsSourceProviders, this::registerIntervalsSourceProvider); + private void registerIntervalsSourceProviders() { + namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class, + IntervalsSourceProvider.Match.NAME, IntervalsSourceProvider.Match::new)); + namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class, + IntervalsSourceProvider.Combine.NAME, IntervalsSourceProvider.Combine::new)); + namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class, + IntervalsSourceProvider.Disjunction.NAME, IntervalsSourceProvider.Disjunction::new)); } private void registerQuery(QuerySpec spec) { @@ -833,13 +830,6 @@ private void registerQuery(QuerySpec spec) { (p, c) -> spec.getParser().fromXContent(p))); } - private void registerIntervalsSourceProvider(SearchPlugin.IntervalSpec spec) { - namedWriteables.add(new NamedWriteableRegistry.Entry(IntervalsSourceProvider.class, - spec.getName().getPreferredName(), spec.getReader())); - namedXContents.add(new NamedXContentRegistry.Entry(IntervalsSourceProvider.class, spec.getName(), - (p, c) -> spec.getParser().apply(p))); - } - public FetchPhase getFetchPhase() { return new FetchPhase(fetchSubPhases); } diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java index f7bcd13ce3e7c..04d13609679b6 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java @@ -38,7 +38,7 @@ public void testSimpleTerm() throws IOException { CannedTokenStream ts = new CannedTokenStream(new Token("term1", 1, 2)); - IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.ORDERED); + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); IntervalsSource expected = Intervals.term("term1"); assertEquals(expected, source); @@ -52,7 +52,7 @@ public void testOrdered() throws IOException { new Token("term3", 5, 6) ); - IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.ORDERED); + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); IntervalsSource expected = Intervals.ordered( Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3") ); @@ -69,7 +69,7 @@ public void testUnordered() throws IOException { new Token("term3", 5, 6) ); - IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.UNORDERED); + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, false); IntervalsSource expected = Intervals.unordered( Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3") ); @@ -86,7 +86,7 @@ public void testPhrase() throws IOException { new Token("term3", 5, 6) ); - IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.PHRASE); + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), 0, true); IntervalsSource expected = Intervals.phrase( Intervals.term("term1"), Intervals.term("term2"), Intervals.term("term3") ); @@ -104,7 +104,7 @@ public void testSimpleSynonyms() throws IOException { new Token("term3", 5, 6) ); - IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.ORDERED); + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); IntervalsSource expected = Intervals.ordered( Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.term("term4")), Intervals.term("term3") ); @@ -128,7 +128,7 @@ public void testGraphSynonyms() throws IOException { new Token("term5", 6, 7) ); - IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), MappedFieldType.IntervalType.ORDERED); + IntervalsSource source = BUILDER.analyzeText(new CachingTokenFilter(ts), -1, true); IntervalsSource expected = Intervals.ordered( Intervals.term("term1"), Intervals.or(Intervals.term("term2"), Intervals.phrase("term3", "term4")), diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index 399da1a834e99..3ea927de9d677 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.query; +import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.intervals.IntervalQuery; import org.apache.lucene.search.intervals.Intervals; @@ -40,20 +41,32 @@ protected IntervalQueryBuilder doCreateTestQueryBuilder() { return new IntervalQueryBuilder(STRING_FIELD_NAME, createRandomSource()); } + @Override + public void testUnknownField() throws IOException { + super.testUnknownField(); + } + + private static final String[] filters = new String[]{ + "containing", "contained_by", "not_containing", "not_contained_by", "not_overlapping" + }; + + private IntervalsSourceProvider.IntervalFilter createRandomFilter() { + if (randomInt(20) > 18) { + return new IntervalsSourceProvider.IntervalFilter(createRandomSource(), randomFrom(filters)); + } + return null; + } + private IntervalsSourceProvider createRandomSource() { switch (randomInt(20)) { case 0: - IntervalsSourceProvider source1 = createRandomSource(); - IntervalsSourceProvider source2 = createRandomSource(); - int relOrd = randomInt(IntervalsSourceProvider.Relate.Relation.values().length - 1); - return new IntervalsSourceProvider.Relate(source1, source2, IntervalsSourceProvider.Relate.Relation.values()[relOrd]); case 1: int orCount = randomInt(4) + 1; List orSources = new ArrayList<>(); for (int i = 0; i < orCount; i++) { orSources.add(createRandomSource()); } - return new IntervalsSourceProvider.Disjunction(orSources); + return new IntervalsSourceProvider.Disjunction(orSources, createRandomFilter()); case 2: case 3: int count = randomInt(5) + 1; @@ -61,9 +74,10 @@ private IntervalsSourceProvider createRandomSource() { for (int i = 0; i < count; i++) { subSources.add(createRandomSource()); } - int typeOrd = randomInt(IntervalsSourceProvider.CombineType.values().length - 1); - int width = randomBoolean() ? Integer.MAX_VALUE : randomIntBetween(count, 100); - return new IntervalsSourceProvider.Combine(subSources, IntervalsSourceProvider.CombineType.values()[typeOrd], width); + boolean ordered = randomBoolean(); + int maxGaps = randomInt(5) - 1; + IntervalsSourceProvider.IntervalFilter filter = createRandomFilter(); + return new IntervalsSourceProvider.Combine(subSources, ordered, maxGaps, filter); default: int wordCount = randomInt(4) + 1; List words = new ArrayList<>(); @@ -71,9 +85,10 @@ private IntervalsSourceProvider createRandomSource() { words.add(randomRealisticUnicodeOfLengthBetween(4, 20)); } String text = String.join(" ", words); - int mtypeOrd = randomInt(MappedFieldType.IntervalType.values().length - 1); - MappedFieldType.IntervalType type = MappedFieldType.IntervalType.values()[mtypeOrd]; - return new IntervalsSourceProvider.Match(text, randomBoolean() ? Integer.MAX_VALUE : randomIntBetween(1, 20), type); + boolean mOrdered = randomBoolean(); + int maxMGaps = randomInt(5) - 1; + String analyzer = randomFrom("simple", "keyword", "whitespace"); + return new IntervalsSourceProvider.Match(text, maxMGaps, mOrdered, analyzer, createRandomFilter()); } } @@ -85,159 +100,149 @@ protected void doAssertLuceneQuery(IntervalQueryBuilder queryBuilder, Query quer public void testMatchInterval() throws IOException { String json = "{ \"intervals\" : " + - "{ \"field\" : \"" + STRING_FIELD_NAME + "\"," + - " \"source\" : { \"match\" : { " + - " \"text\" : \"Hello world\" } } } }"; + "{ \"" + STRING_FIELD_NAME + "\" : { \"match\" : { \"query\" : \"Hello world\" } } } }"; IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); - IntervalQuery expected = new IntervalQuery(STRING_FIELD_NAME, + Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.unordered(Intervals.term("hello"), Intervals.term("world"))); assertEquals(expected, builder.toQuery(createShardContext())); json = "{ \"intervals\" : " + - "{ \"field\" : \"" + STRING_FIELD_NAME + "\"," + - " \"source\" : { \"match\" : { " + - " \"text\" : \"Hello world\"," + - " \"max_width\" : 40 } } } }"; + "{ \"" + STRING_FIELD_NAME + "\" : { " + + " \"match\" : { " + + " \"query\" : \"Hello world\"," + + " \"max_gaps\" : 40 } } } }"; + builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.maxwidth(40, Intervals.unordered(Intervals.term("hello"), Intervals.term("world")))); assertEquals(expected, builder.toQuery(createShardContext())); json = "{ \"intervals\" : " + - "{ \"field\" : \"" + STRING_FIELD_NAME + "\"," + - " \"source\" : { \"match\" : { " + - " \"text\" : \"Hello world\"," + - " \"type\" : \"ordered\" } } } }"; + "{ \"" + STRING_FIELD_NAME + "\" : { " + + " \"match\" : { " + + " \"query\" : \"Hello world\"," + + " \"ordered\" : true }," + + " \"boost\" : 2 } } }"; + + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new BoostQuery(new IntervalQuery(STRING_FIELD_NAME, + Intervals.ordered(Intervals.term("hello"), Intervals.term("world"))), 2); + assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : " + + "{ \"" + STRING_FIELD_NAME + "\" : { " + + " \"match\" : { " + + " \"query\" : \"Hello world\"," + + " \"max_gaps\" : 10," + + " \"analyzer\" : \"whitespace\"," + + " \"ordered\" : true } } } }"; + builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.ordered(Intervals.term("hello"), Intervals.term("world"))); + Intervals.maxwidth(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))); assertEquals(expected, builder.toQuery(createShardContext())); + json = "{ \"intervals\" : " + + "{ \"" + STRING_FIELD_NAME + "\" : { " + + " \"match\" : { " + + " \"query\" : \"Hello world\"," + + " \"max_gaps\" : 10," + + " \"analyzer\" : \"whitespace\"," + + " \"ordered\" : true," + + " \"filter\" : {" + + " \"containing\" : {" + + " \"match\" : { \"query\" : \"blah\" } } } } } } }"; + + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.containing(Intervals.maxwidth(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))), + Intervals.term("blah"))); + assertEquals(expected, builder.toQuery(createShardContext())); } public void testOrInterval() throws IOException { - String json = "{ \"intervals\" : " + - "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + - " \"source\" : { " + - " \"or\" : {" + - " \"sources\": [" + - " { \"match\" : { \"text\" : \"one\" } }," + - " { \"match\" : { \"text\" : \"two\" } } ] } } } }"; + + String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": {" + + " \"any_of\" : { " + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"one\" } }," + + " { \"match\" : { \"query\" : \"two\" } } ] } } } }"; IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.or(Intervals.term("one"), Intervals.term("two"))); assertEquals(expected, builder.toQuery(createShardContext())); + + json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": {" + + " \"any_of\" : { " + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"one\" } }," + + " { \"match\" : { \"query\" : \"two\" } } ]," + + " \"filter\" : {" + + " \"not_containing\" : { \"match\" : { \"query\" : \"three\" } } } } } } }"; + builder = (IntervalQueryBuilder) parseQuery(json); + expected = new IntervalQuery(STRING_FIELD_NAME, + Intervals.notContaining( + Intervals.or(Intervals.term("one"), Intervals.term("two")), + Intervals.term("three"))); + assertEquals(expected, builder.toQuery(createShardContext())); } public void testCombineInterval() throws IOException { - String json = "{ \"intervals\" : " + - "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + - " \"source\" : { " + - " \"combine\" : {" + - " \"type\" : \"ordered\"," + - " \"sources\" : [" + - " { \"match\" : { \"text\" : \"one\" } }," + - " { \"combine\" : { " + - " \"type\" : \"unordered\"," + - " \"sources\" : [" + - " { \"match\" : { \"text\" : \"two\" } }," + - " { \"match\" : { \"text\" : \"three\" } } ] } } ]," + - " \"max_width\" : 30 } } } }"; + String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": {" + + " \"all_of\" : {" + + " \"ordered\" : true," + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"one\" } }," + + " { \"all_of\" : { " + + " \"ordered\" : false," + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"two\" } }," + + " { \"match\" : { \"query\" : \"three\" } } ] } } ]," + + " \"max_gaps\" : 30," + + " \"filter\" : { " + + " \"contained_by\" : { " + + " \"match\" : { " + + " \"query\" : \"SENTENCE\"," + + " \"analyzer\" : \"keyword\" } } } }," + + " \"boost\" : 1.5 } } }"; IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); - Query expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.maxwidth(30, Intervals.ordered( - Intervals.term("one"), - Intervals.unordered(Intervals.term("two"), Intervals.term("three"))))); + Query expected = new BoostQuery(new IntervalQuery(STRING_FIELD_NAME, + Intervals.containedBy( + Intervals.maxwidth(30, Intervals.ordered( + Intervals.term("one"), + Intervals.unordered(Intervals.term("two"), Intervals.term("three")))), + Intervals.term("SENTENCE"))), 1.5f); assertEquals(expected, builder.toQuery(createShardContext())); } public void testCombineDisjunctionInterval() throws IOException { String json = "{ \"intervals\" : " + - "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + - " \"source\" : { " + - " \"combine\" : {" + - " \"type\" : \"ordered\"," + - " \"sources\" : [" + - " { \"match\" : { \"text\" : \"atmosphere\" } }," + - " { \"or\" : {" + - " \"sources\" : [" + - " { \"match\" : { \"text\" : \"cold\" } }," + - " { \"match\" : { \"text\" : \"outside\" } } ] } } ]," + - " \"max_width\" : 30 } } } }"; + "{ \"" + STRING_FIELD_NAME + "\": { " + + " \"all_of\" : {" + + " \"ordered\" : true," + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"atmosphere\" } }," + + " { \"any_of\" : {" + + " \"intervals\" : [" + + " { \"match\" : { \"query\" : \"cold\" } }," + + " { \"match\" : { \"query\" : \"outside\" } } ] } } ]," + + " \"max_gaps\" : 30," + + " \"filter\" : { " + + " \"not_contained_by\" : { " + + " \"match\" : { \"query\" : \"freeze\" } } } } } } }"; IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); Query expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.maxwidth(30, Intervals.ordered( - Intervals.term("atmosphere"), - Intervals.or(Intervals.term("cold"), Intervals.term("outside")) - ))); - assertEquals(expected, builder.toQuery(createShardContext())); - } - - public void testRelateIntervals() throws IOException { - - String json = "{ \"intervals\" : " + - "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + - " \"source\" : { " + - " \"relate\" : {" + - " \"relation\" : \"containing\"," + - " \"source\" : { \"match\" : { \"text\" : \"one\" } }," + - " \"filter\" : { \"match\" : { \"text\" : \"two\" } } } } } }"; - IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); - Query expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.containing(Intervals.term("one"), Intervals.term("two"))); - assertEquals(expected, builder.toQuery(createShardContext())); - - json = "{ \"intervals\" : " + - "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + - " \"source\" : { " + - " \"relate\" : {" + - " \"relation\" : \"contained_by\"," + - " \"source\" : { \"match\" : { \"text\" : \"one\" } }," + - " \"filter\" : { \"match\" : { \"text\" : \"two\" } } } } } }"; - builder = (IntervalQueryBuilder) parseQuery(json); - expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.containedBy(Intervals.term("one"), Intervals.term("two"))); - assertEquals(expected, builder.toQuery(createShardContext())); - - json = "{ \"intervals\" : " + - "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + - " \"source\" : { " + - " \"relate\" : {" + - " \"relation\" : \"not_containing\"," + - " \"source\" : { \"match\" : { \"text\" : \"one\" } }," + - " \"filter\" : { \"match\" : { \"text\" : \"two\" } } } } } }"; - builder = (IntervalQueryBuilder) parseQuery(json); - expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.notContaining(Intervals.term("one"), Intervals.term("two"))); - assertEquals(expected, builder.toQuery(createShardContext())); - - json = "{ \"intervals\" : " + - "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + - " \"source\" : { " + - " \"relate\" : {" + - " \"relation\" : \"not_contained_by\"," + - " \"source\" : { \"match\" : { \"text\" : \"one\" } }," + - " \"filter\" : { \"match\" : { \"text\" : \"two\" } } } } } }"; - builder = (IntervalQueryBuilder) parseQuery(json); - expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.notContainedBy(Intervals.term("one"), Intervals.term("two"))); - assertEquals(expected, builder.toQuery(createShardContext())); - - json = "{ \"intervals\" : " + - "{ \"field\" : \"" + STRING_FIELD_NAME + "\", " + - " \"source\" : { " + - " \"relate\" : {" + - " \"relation\" : \"not_overlapping\"," + - " \"source\" : { \"match\" : { \"text\" : \"one\" } }," + - " \"filter\" : { \"match\" : { \"text\" : \"two\" } } } } } }"; - builder = (IntervalQueryBuilder) parseQuery(json); - expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.nonOverlapping(Intervals.term("one"), Intervals.term("two"))); + Intervals.notContainedBy( + Intervals.maxwidth(30, Intervals.ordered( + Intervals.term("atmosphere"), + Intervals.or(Intervals.term("cold"), Intervals.term("outside")) + )), + Intervals.term("freeze"))); assertEquals(expected, builder.toQuery(createShardContext())); } From 3146c47c9f9c70c0d902e09ff8ab37cea18b8194 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 11 Dec 2018 16:08:19 +0000 Subject: [PATCH 20/29] Clint's API --- .../query-dsl/intervals-query.asciidoc | 161 ++++++---- .../test/search/230_interval_query.yml | 291 ++++++++---------- .../index/query/IntervalBuilder.java | 8 +- .../index/query/IntervalQueryBuilder.java | 4 - .../index/query/IntervalsSourceProvider.java | 5 +- .../elasticsearch/plugins/SearchPlugin.java | 7 +- .../query/IntervalQueryBuilderTests.java | 10 +- .../search/query/SearchQueryIT.java | 19 +- 8 files changed, 251 insertions(+), 254 deletions(-) diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index ad6ed8b6853d9..8d46668503d45 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -2,10 +2,10 @@ === Intervals query An `intervals` query allows fine-grained control over the order and proximity of -matching terms. Matching rules are constructed from a small set of `source` -objects, and the rules are then applied to terms from a particular `field`. +matching terms. Matching rules are constructed from a small set of definitions, +and the rules are then applied to terms from a particular `field`. -The source definitions produce sequences of intervals that span terms in a +The definitions produce sequences of intervals that span terms in a body of text. These intervals can be further combined and filtered by parent sources. @@ -19,25 +19,25 @@ POST _search { "query": { "intervals" : { - "field" : "my_text", - "source" : { - "combine" : { - "type" : "ordered", - "sources" : [ + "my_text" : { + "all_of" : { + "ordered" : true, + "intervals" : [ "match" : { - "text" : "my favourite food", - "type" : "phrase" + "query" : "my favourite food", + "query" : "phrase" }, "or" : { - "sources" : [ - "match" : { "text" : "hot water" }, - "match" : { "text" : "cold porridge" } + "intervals" : [ + "match" : { "query" : "hot water" }, + "match" : { "query" : "cold porridge" } ] } - ] - } + ], + }, + "boost" : 2.0, + "_name" : "favourite_food" } - "boost" : 1.0 } } } @@ -50,75 +50,100 @@ porridge` appear in the correct order, but the text `when it's cold my favourite food is porridge` would not match, because the interval matching `cold porridge` starts before the interval matching `my favourite food`. -==== `match` source +[[intervals-match]] +==== `match` -The `match` source matches analyzed text, and takes the following parameters: +The `match` rule matches analyzed text, and takes the following parameters: [horizontal] -`text`:: -The text to match. It will be analyzed using the search analyzer configured -on the top-level query's field. -`max_width`:: -Specify a maximum distance between the terms in the text. Terms that appear -further apart than this distance will not match. Note that the terms themselves -are included in the width. If unspecified then there is no width restriction -on the match. -`type`:: -An optional restriction on how the terms in the text appear in the document. -Can be `phrase` (the terms must appear consecutively and in-order), `ordered` -and `unordered` (the default). - -==== `combine` source - -The `combine` will match subsources that have specific ordering and proximity -relations to each other. When nested, the intervals produced by this source -span all their subsources. +`query`:: +The text to match. +`max_gaps`:: +Specify a maximum number of gaps between the terms in the text. Terms that +appear further apart than this will not match. If unspecified, or set to -1, +then there is no width restriction on the match. If set to 0 then the terms +must appear next to each other. +`ordered`:: +Whether or not the terms must appear in their specified order +`analyzer`:: +Which analyzer should be used to analyze terms in the `query`. By +default, the search analyzer of the top-level field will be used. +`filter`:: +An <> + +[[intervals-all_of]] +==== `all_of` + +`all_of` returns returns matches that span a combination of other rules. [horizontal] -`sources`:: -An array of sources to combine. All subsources must produce a match in a +`intervals`:: +An array of rules to combine. All rules must produce a match in a document for the overall source to match. -`max_width`:: -Specify a maximum width covered by the matching sources - combinations that -match across a distance greater than this width will not match. Note that the -width of the subsources are included here, so a combination of `black sheep` -and `yes sir` will have a minimum width of `4`. If unspecified then there is -no width restriction on the match. -`type`:: -An optional restriction on how the subsources in the match appear in the -document. Can be `block` (the subsources must appear consecutively and in-order), -`ordered` and `unordered` (the default). +`max_gaps`:: +Specify a maximum number of gaps between the rules. Combinations that match +across a distance greater than this will not match. If set to -1 or +unspecified, there is no restriction on this distance. If set to 0, then the +matches produced by the rules must all appear immediately next to each other. +`ordered`:: +Whether the intervals produced by the rules should appear in the order in +which they are specified. Defaults to `false` +`filter`:: +An <> -==== `or` source +[[intervals-any_of]] +==== `any_of` source -The `or` source will match any of its nested sub-sources. +The `or` rule will match any of its nested sub-rules. [horizontal] -`sources`:: -An array of sources to match +`intervals`:: +An array of rules to match +`filter`:: +An <> -==== `relate` source +[[interval_filter]] +==== filters -The `relate` source will filter a source by its relation to another source. -The resulting intervals are taken directly from the `source`. +You can filter intervals produced by any rules by their relation to the +intervals produced by another rule. The following example will return +documents that have the words `hot` and `porridge` within 10 positions +of each other, without the word `salty` in between: -[horizontal] -`source`:: -The source to filter -`filter`:: -The source to filter by -`relation`:: -How the filter should be applied +[source,js] +-------------------------------------------------- +POST _search +{ + "query": { + "intervals" : { + "my_text" : { + "match" : { + "query" : "hot porridge", + "max_gaps" : 10, + "filter" : { + "not_containing" : { + "match" : { + "query" : "salty" + } + } + } + } + } + } + } +} +-------------------------------------------------- +// CONSOLE -The following relations are available: +The following filters are available: [horizontal] `containing`:: -Produces intervals that contain an interval from the filter source +Produces intervals that contain an interval from the filter rule `contained_by`:: -Produces intervals that are contained by an interval from the filter source +Produces intervals that are contained by an interval from the filter rule `not_containing`:: -Produces intervals that do not contain an interval from the filter source +Produces intervals that do not contain an interval from the filter rule `not_contained_by`:: -Produces intervals that are not contained by an interval from the filter source +Produces intervals that are not contained by an interval from the filter rule `not_overlapping`:: -Produces intervals that do not overlap with an interval from the filter source \ No newline at end of file +Produces intervals that do not overlap with an interval from the filter rule \ No newline at end of file diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml index db4dd32b789c5..2a25055be32d0 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/230_interval_query.yml @@ -34,12 +34,11 @@ setup: body: query: intervals: - field: text - source: + text: match: - text: "cold outside" - type: ordered - - match: { hits.total: 2 } + query: "cold outside" + ordered: true + - match: { hits.total.value: 2 } --- "Test default unordered matching": @@ -49,11 +48,10 @@ setup: body: query: intervals: - field: text - source: + text: match: - text: "cold outside" - - match: { hits.total: 3 } + query: "cold outside" + - match: { hits.total.value: 3 } --- "Test explicit unordered matching": @@ -63,12 +61,11 @@ setup: body: query: intervals: - field: text - source: + text: match: - text: "cold outside" - type: "unordered" - - match: { hits.total: 3 } + query: "cold outside" + ordered: false + - match: { hits.total.value: 3 } --- "Test phrase matching": @@ -78,43 +75,41 @@ setup: body: query: intervals: - field: text - source: + text: match: - text: "cold outside" - type: "phrase" - - match: { hits.total: 1 } + query: "cold outside" + ordered: true + max_gaps: 0 + - match: { hits.total.value: 1 } --- -"Test unordered max_width matching": +"Test unordered max_gaps matching": - do: search: index: test body: query: intervals: - field: text - source: + text: match: - text: "cold outside" - max_width: 3 - - match: { hits.total: 2 } + query: "cold outside" + max_gaps: 1 + - match: { hits.total.value: 2 } --- -"Test ordered max_width matching": +"Test ordered max_gaps matching": - do: search: index: test body: query: intervals: - field: text - source: + text: match: - text: "cold outside" - max_width: 2 - type: ordered - - match: { hits.total: 1 } + query: "cold outside" + max_gaps: 0 + ordered: true + - match: { hits.total.value: 1 } --- "Test ordered combination with disjunction": @@ -124,40 +119,38 @@ setup: body: query: intervals: - field: text - source: - combine: - sources: - - or: - sources: + text: + all_of: + intervals: + - any_of: + intervals: - match: - text: "cold" + query: "cold" - match: - text: "outside" + query: "outside" - match: - text: "atmosphere" - type: ordered - - match: { hits.total: 1 } + query: "atmosphere" + ordered: true + - match: { hits.total.value: 1 } --- -"Test ordered combination with max_width": +"Test ordered combination with max_gaps": - do: search: index: test body: query: intervals: - field: text - source: - combine: - sources: + text: + all_of: + intervals: - match: - text: "cold" + query: "cold" - match: - text: "outside" - max_width: 2 - type: ordered - - match: { hits.total: 1 } + query: "outside" + max_gaps: 0 + ordered: true + - match: { hits.total.value: 1 } --- "Test ordered combination": @@ -167,16 +160,15 @@ setup: body: query: intervals: - field: text - source: - combine: - sources: + text: + all_of: + intervals: - match: - text: "cold" + query: "cold" - match: - text: "outside" - type: ordered - - match: { hits.total: 2 } + query: "outside" + ordered: true + - match: { hits.total.value: 2 } --- "Test unordered combination": @@ -186,17 +178,16 @@ setup: body: query: intervals: - field: text - source: - combine: - sources: + text: + all_of: + intervals: - match: - text: "cold" + query: "cold" - match: - text: "outside" - max_width: 3 - type: unordered - - match: { hits.total: 2 } + query: "outside" + max_gaps: 1 + ordered: false + - match: { hits.total.value: 2 } --- "Test block combination": @@ -206,16 +197,16 @@ setup: body: query: intervals: - field: text - source: - combine: - sources: + text: + all_of: + intervals: - match: - text: "cold" + query: "cold" - match: - text: "outside" - type: block - - match: { hits.total: 1 } + query: "outside" + ordered: true + max_gaps: 0 + - match: { hits.total.value: 1 } --- @@ -226,22 +217,19 @@ setup: body: query: intervals: - field: text - source: - relate: - source: - combine: - sources: - - match: - text: "cold" - - match: - text: "outside" - type: unordered + text: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + ordered: false filter: - match: - text: "is" - relation: containing - - match: { hits.total: 1 } + containing: + match: + query: "is" + - match: { hits.total.value: 1 } --- @@ -252,22 +240,19 @@ setup: body: query: intervals: - field: text - source: - relate: - source: - combine: - sources: - - match: - text: "cold" - - match: - text: "outside" - type: unordered + text: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + ordered: false filter: - match: - text: "is" - relation: not_containing - - match: { hits.total: 2 } + not_containing: + match: + query: "is" + - match: { hits.total.value: 2 } --- "Test contained_by": @@ -277,22 +262,19 @@ setup: body: query: intervals: - field: text - source: - relate: - source: - match: - text: "is" + text: + match: + query: "is" filter: - combine: - sources: - - match: - text: "cold" - - match: - text: "outside" - type: unordered - relation: contained_by - - match: { hits.total: 1 } + contained_by: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + ordered: false + - match: { hits.total.value: 1 } --- "Test not_contained_by": @@ -302,22 +284,18 @@ setup: body: query: intervals: - field: text - source: - relate: - source: - match: - text: "it" + text: + match: + query: "it" filter: - combine: - sources: - - match: - text: "cold" - - match: - text: "outside" - type: unordered - relation: not_contained_by - - match: { hits.total: 1 } + not_contained_by: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + - match: { hits.total.value: 1 } --- "Test not_overlapping": @@ -327,26 +305,23 @@ setup: body: query: intervals: - field: text - source: - relate: - source: - combine: - sources: - - match: - text: "cold" - - match: - text: "outside" - type: ordered + text: + all_of: + intervals: + - match: + query: "cold" + - match: + query: "outside" + ordered: true filter: - combine: - sources: - - match: - text: "baby" - - match: - text: "there" - type: unordered - relation: not_overlapping - - match: { hits.total: 1 } + not_overlapping: + all_of: + intervals: + - match: + query: "baby" + - match: + query: "there" + ordered: false + - match: { hits.total.value: 1 } diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java index 02afab2ec53d1..5c7f5f82d5e6e 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -138,8 +138,7 @@ public static IntervalsSource combineSources(List sources, int if (maxGaps == -1) { return inner; } - // norelease - return Intervals.maxwidth(maxGaps, inner); // TODO Change this to maxgaps when lucene snapshot upgraded + return Intervals.maxgaps(maxGaps, inner); } protected List analyzeTerms(TokenStream ts) throws IOException { @@ -235,6 +234,11 @@ public int end() { return NO_MORE_INTERVALS; } + @Override + public int gaps() { + throw new UnsupportedOperationException(); + } + @Override public int nextInterval() { return NO_MORE_INTERVALS; diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java index 84fa750c34527..46148dddfebe6 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java @@ -22,11 +22,9 @@ import org.apache.lucene.index.IndexOptions; import org.apache.lucene.search.Query; import org.apache.lucene.search.intervals.IntervalQuery; -import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; -import org.elasticsearch.common.xcontent.ConstructingObjectParser; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.MappedFieldType; @@ -34,8 +32,6 @@ import java.io.IOException; import java.util.Objects; -import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; - /** * Builder for {@link IntervalQuery} */ diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index 61961f9775516..bb13dee2ec4f4 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -51,8 +51,7 @@ * Built-in sources include {@link Match}, which analyzes a text string and converts it * to a proximity source (phrase, ordered or unordered depending on how * strict the matching should be); {@link Combine}, which allows proximity queries - * between different sub-sources; and {@link Filter}, which allows sources to be filtered - * by their relation to other sources. + * between different sub-sources; and {@link Disjunction}. */ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXContentFragment { @@ -377,7 +376,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return new Combine(subSources, ordered, maxGaps, filter); }); static { - PARSER.declareBoolean(constructorArg(), new ParseField("ordered")); + PARSER.declareBoolean(optionalConstructorArg(), new ParseField("ordered")); PARSER.declareObjectArray(constructorArg(), (p, c) -> IntervalsSourceProvider.parseInnerIntervals(p), new ParseField("intervals")); PARSER.declareInt(optionalConstructorArg(), new ParseField("max_gaps")); diff --git a/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java b/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java index c6bca83ccb30b..d07467e5d1f8f 100644 --- a/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java +++ b/server/src/main/java/org/elasticsearch/plugins/SearchPlugin.java @@ -28,7 +28,6 @@ import org.elasticsearch.common.lucene.search.function.ScoreFunction; import org.elasticsearch.common.xcontent.XContent; import org.elasticsearch.common.xcontent.XContentParser; -import org.elasticsearch.index.query.IntervalsSourceProvider; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryParser; import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilder; @@ -42,13 +41,13 @@ import org.elasticsearch.search.aggregations.bucket.significant.SignificantTerms; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic; import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser; -import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; -import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregator; import org.elasticsearch.search.aggregations.pipeline.MovAvgModel; +import org.elasticsearch.search.aggregations.pipeline.MovAvgPipelineAggregator; +import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator; import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; -import org.elasticsearch.search.rescore.RescorerBuilder; import org.elasticsearch.search.rescore.Rescorer; +import org.elasticsearch.search.rescore.RescorerBuilder; import org.elasticsearch.search.suggest.Suggest; import org.elasticsearch.search.suggest.Suggester; import org.elasticsearch.search.suggest.SuggestionBuilder; diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index 3ea927de9d677..2d849298da9e7 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -116,7 +116,7 @@ public void testMatchInterval() throws IOException { builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.maxwidth(40, Intervals.unordered(Intervals.term("hello"), Intervals.term("world")))); + Intervals.maxgaps(40, Intervals.unordered(Intervals.term("hello"), Intervals.term("world")))); assertEquals(expected, builder.toQuery(createShardContext())); json = "{ \"intervals\" : " + @@ -141,7 +141,7 @@ public void testMatchInterval() throws IOException { builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.maxwidth(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))); + Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world")))); assertEquals(expected, builder.toQuery(createShardContext())); json = "{ \"intervals\" : " + @@ -157,7 +157,7 @@ public void testMatchInterval() throws IOException { builder = (IntervalQueryBuilder) parseQuery(json); expected = new IntervalQuery(STRING_FIELD_NAME, - Intervals.containing(Intervals.maxwidth(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))), + Intervals.containing(Intervals.maxgaps(10, Intervals.ordered(Intervals.term("Hello"), Intervals.term("world"))), Intervals.term("blah"))); assertEquals(expected, builder.toQuery(createShardContext())); } @@ -211,7 +211,7 @@ public void testCombineInterval() throws IOException { IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); Query expected = new BoostQuery(new IntervalQuery(STRING_FIELD_NAME, Intervals.containedBy( - Intervals.maxwidth(30, Intervals.ordered( + Intervals.maxgaps(30, Intervals.ordered( Intervals.term("one"), Intervals.unordered(Intervals.term("two"), Intervals.term("three")))), Intervals.term("SENTENCE"))), 1.5f); @@ -238,7 +238,7 @@ public void testCombineDisjunctionInterval() throws IOException { IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json); Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.notContainedBy( - Intervals.maxwidth(30, Intervals.ordered( + Intervals.maxgaps(30, Intervals.ordered( Intervals.term("atmosphere"), Intervals.or(Intervals.term("cold"), Intervals.term("outside")) )), diff --git a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java index d3517b93bfff0..31b3051ae0103 100644 --- a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -1322,17 +1322,16 @@ public void testIntervals() throws InterruptedException { .setSource("description", "it's cold outside, there's no kind of atmosphere")); String json = "{ \"intervals\" : " + - "{ \"field\" : \"description\", " + - " \"source\" : { " + - " \"combine\" : {" + - " \"type\" : \"ordered\"," + - " \"sources\" : [" + - " { \"or\" : {" + + "{ \"description\": { " + + " \"all_of\" : {" + + " \"ordered\" : \"true\"," + + " \"intervals\" : [" + + " { \"any_of\" : {" + " \"sources\" : [" + - " { \"match\" : { \"text\" : \"cold\" } }," + - " { \"match\" : { \"text\" : \"outside\" } } ] } }," + - " { \"match\" : { \"text\" : \"atmosphere\" } } ]," + - " \"max_width\" : 30 } } } }"; + " { \"match\" : { \"query\" : \"cold\" } }," + + " { \"match\" : { \"query\" : \"outside\" } } ] } }," + + " { \"match\" : { \"query\" : \"atmosphere\" } } ]," + + " \"gaps\" : 30 } } } }"; SearchResponse response = client().prepareSearch("test").setQuery(wrapperQuery(json)).get(); assertHitCount(response, 1L); } From 3bf1b0d2a2e63dd65cf425c5cb2c92a02d1c9ddc Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 11 Dec 2018 16:08:45 +0000 Subject: [PATCH 21/29] Delete bash script --- rest | 5 ----- 1 file changed, 5 deletions(-) delete mode 100755 rest diff --git a/rest b/rest deleted file mode 100755 index c14ab370c47ae..0000000000000 --- a/rest +++ /dev/null @@ -1,5 +0,0 @@ -#!/bin/bash - -./gradlew :distribution:archives:integ-test-zip:integTest \ - -Dtests.class="org.elasticsearch.test.rest.*Yaml*IT" \ - -Dtests.method="test {p0=$1}" From 2d2df63b349786fe988bc224c24b5fb7fb6d422c Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 11 Dec 2018 16:32:54 +0000 Subject: [PATCH 22/29] doc fixes --- .../query-dsl/intervals-query.asciidoc | 30 +++++++++++-------- 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index 8d46668503d45..57685d7174032 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -23,15 +23,20 @@ POST _search "all_of" : { "ordered" : true, "intervals" : [ - "match" : { - "query" : "my favourite food", - "query" : "phrase" + { + "match" : { + "query" : "my favourite food", + "max_gaps" : 0, + "ordered" : true + } }, - "or" : { - "intervals" : [ - "match" : { "query" : "hot water" }, - "match" : { "query" : "cold porridge" } - ] + { + "or" : { + "intervals" : [ + { "match" : { "query" : "hot water" } }, + { "match" : { "query" : "cold porridge" } } + ] + } } ], }, @@ -64,12 +69,13 @@ appear further apart than this will not match. If unspecified, or set to -1, then there is no width restriction on the match. If set to 0 then the terms must appear next to each other. `ordered`:: -Whether or not the terms must appear in their specified order +Whether or not the terms must appear in their specified order. Defaults to +`false` `analyzer`:: Which analyzer should be used to analyze terms in the `query`. By default, the search analyzer of the top-level field will be used. `filter`:: -An <> +An optional <> [[intervals-all_of]] ==== `all_of` @@ -89,7 +95,7 @@ matches produced by the rules must all appear immediately next to each other. Whether the intervals produced by the rules should appear in the order in which they are specified. Defaults to `false` `filter`:: -An <> +An optional <> [[intervals-any_of]] ==== `any_of` source @@ -100,7 +106,7 @@ The `or` rule will match any of its nested sub-rules. `intervals`:: An array of rules to match `filter`:: -An <> +An optional <> [[interval_filter]] ==== filters From 67bc11abe4d3fc034f2c9924e621034b7257bef8 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 11 Dec 2018 16:48:26 +0000 Subject: [PATCH 23/29] imports --- .../main/java/org/elasticsearch/index/query/IntervalBuilder.java | 1 - .../org/elasticsearch/index/query/IntervalsSourceProvider.java | 1 - .../java/org/elasticsearch/index/query/IntervalBuilderTests.java | 1 - .../org/elasticsearch/index/query/IntervalQueryBuilderTests.java | 1 - 4 files changed, 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java index 5c7f5f82d5e6e..0fe7d53fe6d96 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -34,7 +34,6 @@ import org.apache.lucene.search.intervals.IntervalsSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings; -import org.elasticsearch.index.mapper.MappedFieldType; import java.io.IOException; import java.util.ArrayList; diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java index bb13dee2ec4f4..79bcbe26fbc04 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalsSourceProvider.java @@ -30,7 +30,6 @@ import org.elasticsearch.common.xcontent.ConstructingObjectParser; import org.elasticsearch.common.xcontent.ToXContent; import org.elasticsearch.common.xcontent.ToXContentFragment; -import org.elasticsearch.common.xcontent.ToXContentObject; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.analysis.NamedAnalyzer; diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java index 04d13609679b6..a565db41516a9 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalBuilderTests.java @@ -25,7 +25,6 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.search.intervals.Intervals; import org.apache.lucene.search.intervals.IntervalsSource; -import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.test.ESTestCase; import java.io.IOException; diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index 2d849298da9e7..cdb45ceb7af14 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -23,7 +23,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.intervals.IntervalQuery; import org.apache.lucene.search.intervals.Intervals; -import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.test.AbstractQueryTestCase; From abf75bda3a85d53157a3730bc397684a0a305e57 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Tue, 11 Dec 2018 17:31:04 +0000 Subject: [PATCH 24/29] docs --- docs/reference/query-dsl/intervals-query.asciidoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index 57685d7174032..f99aaa85df6f5 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -1,4 +1,4 @@ -[[intervals-query]] +[[query-dsl-intervals-query]] === Intervals query An `intervals` query allows fine-grained control over the order and proximity of @@ -31,14 +31,14 @@ POST _search } }, { - "or" : { + "any_of" : { "intervals" : [ { "match" : { "query" : "hot water" } }, { "match" : { "query" : "cold porridge" } } ] } } - ], + ] }, "boost" : 2.0, "_name" : "favourite_food" From 0b14af374a1279f887d1b799bbd269432476361e Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Wed, 12 Dec 2018 09:57:19 +0000 Subject: [PATCH 25/29] test fix --- .../java/org/elasticsearch/search/query/SearchQueryIT.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java index 31b3051ae0103..2ac47882afa83 100644 --- a/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/server/src/test/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -1327,11 +1327,11 @@ public void testIntervals() throws InterruptedException { " \"ordered\" : \"true\"," + " \"intervals\" : [" + " { \"any_of\" : {" + - " \"sources\" : [" + + " \"intervals\" : [" + " { \"match\" : { \"query\" : \"cold\" } }," + " { \"match\" : { \"query\" : \"outside\" } } ] } }," + " { \"match\" : { \"query\" : \"atmosphere\" } } ]," + - " \"gaps\" : 30 } } } }"; + " \"max_gaps\" : 30 } } } }"; SearchResponse response = client().prepareSearch("test").setQuery(wrapperQuery(json)).get(); assertHitCount(response, 1L); } From 45bf4992d2b4da98a8d84e81baeaf7e8760ce501 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 13 Dec 2018 10:39:04 +0000 Subject: [PATCH 26/29] feedback --- .../query-dsl/intervals-query.asciidoc | 109 +++++++++++++++++- .../index/mapper/TextFieldMapper.java | 2 +- .../index/query/IntervalBuilder.java | 4 +- .../index/query/IntervalQueryBuilder.java | 10 +- .../query/IntervalQueryBuilderTests.java | 25 ++-- 5 files changed, 137 insertions(+), 13 deletions(-) diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index f99aaa85df6f5..e6a78649f1773 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -5,7 +5,7 @@ An `intervals` query allows fine-grained control over the order and proximity of matching terms. Matching rules are constructed from a small set of definitions, and the rules are then applied to terms from a particular `field`. -The definitions produce sequences of intervals that span terms in a +The definitions produce sequences of minimal intervals that span terms in a body of text. These intervals can be further combined and filtered by parent sources. @@ -152,4 +152,109 @@ Produces intervals that do not contain an interval from the filter rule `not_contained_by`:: Produces intervals that are not contained by an interval from the filter rule `not_overlapping`:: -Produces intervals that do not overlap with an interval from the filter rule \ No newline at end of file +Produces intervals that do not overlap with an interval from the filter rule + +[[interval-minimization]] +==== Minimization + +The intervals query always minimizes intervals, to ensure that queries can +run in linear time. This can sometimes cause surprising results, particularly +when using `max_gaps` restrictions or filters. For example, take the +following query, searching for `salty` contained within the phrase `hot +porridge`: + +[source,js] +-------------------------------------------------- +POST _search +{ + "query": { + "intervals" : { + "my_text" : { + "match" : { + "query" : "salty", + "filter" : { + "contained_by" : { + "match" : { + "query" : "hot porridge" + } + } + } + } + } + } + } +} +-------------------------------------------------- +// CONSOLE + +This query will *not* match a document containing the phrase `hot porridge is +salty porridge`, because the intervals returned by the match query for `hot +porridge` only cover the initial two terms in this document, and these do not +overlap the intervals covering `salty`. + +Another restriction to be aware of is the case of `any_of` rules that contain +sub-rules which overlap. In particular, if one of the rules is a strict +prefix of the other, then the longer rule will never be matched, which can +cause surprises when used in combination with `max_gaps`. Consider the +following query, searching for `the` immediately followed by `big` or `big bad`, +immediately followed by `wolf`: + +[source,js] +-------------------------------------------------- +POST _search +{ + "query": { + "intervals" : { + "my_text" : { + "all_of" : { + "rules" : [ + { "match" : { "query" : "the" } }, + { "any_of" : { + "rules" : [ + { "match" : { "query" : "big" } }, + { "match" : { "query" : "big bad" } } + ] } }, + { "match" : { "query" : "wolf" } } + ], + "max_gaps" : 0, + "ordered" : true + } + } + } + } +} +-------------------------------------------------- +// CONSOLE + +Counter-intuitively, this query *will not* match the document `the big bad +wolf`, because the `any_of` rule in the middle will only produce intervals +for `big` - intervals for `big bad` being longer than those for `big`, while +starting at the same position, and so being minimized away. In these cases, +it's better to rewrite the query so that all of the options are explicitly +laid out at the top level: + +[source,js] +-------------------------------------------------- +POST _search +{ + "query": { + "intervals" : { + "my_text" : { + "any_of" : { + "rules" : [ + { "match" : { + "query" : "the big bad wolf", + "ordered" : true, + "max_gaps" : 0 } }, + { "match" : { + "query" : "the big wolf", + "ordered" : true, + "max_gaps" : 0 } }, + ] + } + } + } + } +} +-------------------------------------------------- +// CONSOLE \ No newline at end of file diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index a5ab1931cdc29..5987e167dc9ab 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -584,7 +584,7 @@ public Query existsQuery(QueryShardContext context) { @Override public IntervalsSource intervals(String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer) throws IOException { if (indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { - throw new IllegalArgumentException("Cannot create source against field [" + name() + "] with no positions indexed"); + throw new IllegalArgumentException("Cannot create intervals against field [" + name() + "] with no positions indexed"); } IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer); return builder.analyzeText(text, maxGaps, ordered); diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java index 0fe7d53fe6d96..7f42eb137190d 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalBuilder.java @@ -122,7 +122,7 @@ protected IntervalsSource analyzeTerm(TokenStream ts) throws IOException { return Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef())); } - public static IntervalsSource combineSources(List sources, int maxGaps, boolean ordered) { + protected static IntervalsSource combineSources(List sources, int maxGaps, boolean ordered) { if (sources.size() == 0) { return NO_INTERVALS; } @@ -218,7 +218,7 @@ protected List analyzeGraph(TokenStream source) throws IOExcept return clauses; } - public static final IntervalsSource NO_INTERVALS = new IntervalsSource() { + private static final IntervalsSource NO_INTERVALS = new IntervalsSource() { @Override public IntervalIterator intervals(String field, LeafReaderContext ctx) { diff --git a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java index 46148dddfebe6..a1badc38323da 100644 --- a/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java +++ b/server/src/main/java/org/elasticsearch/index/query/IntervalQueryBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.index.IndexOptions; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.intervals.IntervalQuery; import org.elasticsearch.common.ParsingException; @@ -81,6 +82,7 @@ public static IntervalQueryBuilder fromXContent(XContentParser parser) throws IO String name = null; float boost = 1; IntervalsSourceProvider provider = null; + String providerName = null; while (parser.nextToken() != XContentParser.Token.END_OBJECT) { if (parser.currentToken() != XContentParser.Token.FIELD_NAME) { throw new ParsingException(parser.getTokenLocation(), @@ -96,6 +98,11 @@ public static IntervalQueryBuilder fromXContent(XContentParser parser) throws IO boost = parser.floatValue(); break; default: + if (providerName != null) { + throw new ParsingException(parser.getTokenLocation(), + "Only one interval rule can be specified, found [" + providerName + "] and [" + parser.currentName() + "]"); + } + providerName = parser.currentName(); provider = IntervalsSourceProvider.fromXContent(parser); } @@ -118,7 +125,8 @@ public static IntervalQueryBuilder fromXContent(XContentParser parser) throws IO protected Query doToQuery(QueryShardContext context) throws IOException { MappedFieldType fieldType = context.fieldMapper(field); if (fieldType == null) { - throw new IllegalArgumentException("Cannot create IntervalQuery over non-existent field [" + field + "]"); + // Be lenient with unmapped fields so that cross-index search will work nicely + return new MatchNoDocsQuery(); } if (fieldType.tokenized() == false || fieldType.indexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { diff --git a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java index cdb45ceb7af14..06ab542ebc092 100644 --- a/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java +++ b/server/src/test/java/org/elasticsearch/index/query/IntervalQueryBuilderTests.java @@ -20,9 +20,11 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.BoostQuery; +import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.intervals.IntervalQuery; import org.apache.lucene.search.intervals.Intervals; +import org.elasticsearch.common.ParsingException; import org.elasticsearch.search.internal.SearchContext; import org.elasticsearch.test.AbstractQueryTestCase; @@ -245,15 +247,12 @@ public void testCombineDisjunctionInterval() throws IOException { assertEquals(expected, builder.toQuery(createShardContext())); } - public void testNonIndexedFields() { + public void testNonIndexedFields() throws IOException { IntervalsSourceProvider provider = createRandomSource(); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> { - IntervalQueryBuilder builder = new IntervalQueryBuilder("no_such_field", provider); - builder.doToQuery(createShardContext()); - }); - assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over non-existent field [no_such_field]")); + IntervalQueryBuilder b = new IntervalQueryBuilder("no_such_field", provider); + assertThat(b.toQuery(createShardContext()), equalTo(new MatchNoDocsQuery())); - e = expectThrows(IllegalArgumentException.class, () -> { + Exception e = expectThrows(IllegalArgumentException.class, () -> { IntervalQueryBuilder builder = new IntervalQueryBuilder(INT_FIELD_NAME, provider); builder.doToQuery(createShardContext()); }); @@ -266,4 +265,16 @@ public void testNonIndexedFields() { assertThat(e.getMessage(), equalTo("Cannot create IntervalQuery over field [" + STRING_FIELD_NAME_2 + "] with no indexed positions")); } + + public void testMultipleProviders() { + String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " + + "\"boost\" : 1," + + "\"match\" : { \"query\" : \"term1\" }," + + "\"all_of\" : { \"intervals\" : [ { \"query\" : \"term2\" } ] } }"; + + ParsingException e = expectThrows(ParsingException.class, () -> { + parseQuery(json); + }); + assertThat(e.getMessage(), equalTo("Only one interval rule can be specified, found [match] and [all_of]")); + } } From a33d816ac0bf703ed12bd1cf9d04d7c587de1bd3 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 13 Dec 2018 10:59:09 +0000 Subject: [PATCH 27/29] comma --- docs/reference/query-dsl/intervals-query.asciidoc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index e6a78649f1773..cf98b6fa52914 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -249,7 +249,7 @@ POST _search { "match" : { "query" : "the big wolf", "ordered" : true, - "max_gaps" : 0 } }, + "max_gaps" : 0 } } ] } } From 9834a06307953561c12e038fb5c0cd9a2fdd86b5 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Thu, 13 Dec 2018 11:34:25 +0000 Subject: [PATCH 28/29] docs fixes --- docs/reference/query-dsl/intervals-query.asciidoc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index cf98b6fa52914..3dd8652f85389 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -207,10 +207,10 @@ POST _search "intervals" : { "my_text" : { "all_of" : { - "rules" : [ + "intervals" : [ { "match" : { "query" : "the" } }, { "any_of" : { - "rules" : [ + "intervals" : [ { "match" : { "query" : "big" } }, { "match" : { "query" : "big bad" } } ] } }, @@ -241,7 +241,7 @@ POST _search "intervals" : { "my_text" : { "any_of" : { - "rules" : [ + "intervals" : [ { "match" : { "query" : "the big bad wolf", "ordered" : true, From a75416532c1682f7e5f5d175e883c3c30a531607 Mon Sep 17 00:00:00 2001 From: Alan Woodward Date: Fri, 14 Dec 2018 11:19:02 +0000 Subject: [PATCH 29/29] Tidy up doc references to old rule --- docs/reference/query-dsl/intervals-query.asciidoc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index 3dd8652f85389..790fdf08bfdce 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -98,9 +98,9 @@ which they are specified. Defaults to `false` An optional <> [[intervals-any_of]] -==== `any_of` source +==== `any_of` -The `or` rule will match any of its nested sub-rules. +The `any_of` rule emits intervals produced by any of its sub-rules. [horizontal] `intervals`::