Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@

package org.elasticsearch.index.mapper.annotatedtext;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.elasticsearch.index.analysis.AnalyzerScope;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.index.mapper.ContentPath;
import org.elasticsearch.index.mapper.FieldTypeTestCase;
import org.elasticsearch.index.mapper.MappedFieldType;
Expand All @@ -25,9 +23,8 @@ public class AnnotatedTextFieldTypeTests extends FieldTypeTestCase {

public void testIntervals() throws IOException {
MappedFieldType ft = new AnnotatedTextFieldMapper.AnnotatedTextFieldType("field", Collections.emptyMap());
NamedAnalyzer a = new NamedAnalyzer("name", AnalyzerScope.INDEX, new StandardAnalyzer());
IntervalsSource source = ft.intervals("Donald Trump", 0, true, a, false);
assertEquals(Intervals.phrase(Intervals.term("donald"), Intervals.term("trump")), source);
IntervalsSource source = ft.termIntervals(new BytesRef("donald"), null);
assertEquals(Intervals.term("donald"), source);
}

public void testFetchSourceValue() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
import org.elasticsearch.common.geo.ShapeRelation;
import org.elasticsearch.common.time.DateMathParser;
import org.elasticsearch.common.unit.Fuzziness;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.query.DistanceFeatureQueryBuilder;
import org.elasticsearch.index.query.QueryRewriteContext;
Expand Down Expand Up @@ -279,10 +278,34 @@ public Query distanceFeatureQuery(Object origin, String pivot, SearchExecutionCo
}

/**
* Create an {@link IntervalsSource} to be used for proximity queries
* Create an {@link IntervalsSource} for the given term.
*/
public IntervalsSource intervals(String query, int max_gaps, boolean ordered,
NamedAnalyzer analyzer, boolean prefix) throws IOException {
public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext context) {
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
+ "] which is of type [" + typeName() + "]");
}

/**
* Create an {@link IntervalsSource} for the given prefix.
*/
public IntervalsSource prefixIntervals(BytesRef prefix, SearchExecutionContext context) {
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
+ "] which is of type [" + typeName() + "]");
}

/**
* Create a fuzzy {@link IntervalsSource} for the given term.
*/
public IntervalsSource fuzzyIntervals(String term, int maxDistance, int prefixLength,
boolean transpositions, SearchExecutionContext context) {
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
+ "] which is of type [" + typeName() + "]");
}

/**
* Create a wildcard {@link IntervalsSource} for the given pattern.
*/
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
throw new IllegalArgumentException("Can only use interval queries on text fields - not on [" + name
+ "] which is of type [" + typeName() + "]");
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
Expand Down Expand Up @@ -58,7 +59,6 @@
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData;
import org.elasticsearch.index.mapper.Mapper.TypeParser.ParserContext;
import org.elasticsearch.index.query.IntervalBuilder;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.index.similarity.SimilarityProvider;
import org.elasticsearch.search.aggregations.support.CoreValuesSourceType;
Expand Down Expand Up @@ -676,23 +676,44 @@ public SpanQuery spanPrefixQuery(String value, SpanMultiTermQueryWrapper.SpanRew
}

@Override
public IntervalsSource intervals(String text, int maxGaps, boolean ordered,
NamedAnalyzer analyzer, boolean prefix) throws IOException {
public IntervalsSource termIntervals(BytesRef term, SearchExecutionContext context) {
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
if (analyzer == null) {
analyzer = getTextSearchInfo().getSearchAnalyzer();
return Intervals.term(term);
}

@Override
public IntervalsSource prefixIntervals(BytesRef term, SearchExecutionContext context) {
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
if (prefix) {
BytesRef normalizedTerm = analyzer.normalize(name(), text);
if (prefixFieldType != null) {
return prefixFieldType.intervals(normalizedTerm);
}
return Intervals.prefix(normalizedTerm);
if (prefixFieldType != null) {
return prefixFieldType.intervals(term);
}
return Intervals.prefix(term);
}

@Override
public IntervalsSource fuzzyIntervals(String term, int maxDistance, int prefixLength,
boolean transpositions, SearchExecutionContext context) {
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
FuzzyQuery fq = new FuzzyQuery(new Term(name(), term),
maxDistance, prefixLength, 128, transpositions);
return Intervals.multiterm(fq.getAutomata(), term);
}

@Override
public IntervalsSource wildcardIntervals(BytesRef pattern, SearchExecutionContext context) {
if (getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + name() + "] with no positions indexed");
}
if (prefixFieldType != null) {
return prefixFieldType.intervals(pattern);
}
IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? getTextSearchInfo().getSearchAnalyzer() : analyzer);
return builder.analyzeText(text, maxGaps, ordered);
return Intervals.wildcard(pattern);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
/**
* Constructs an IntervalsSource based on analyzed text
*/
public class IntervalBuilder {
public abstract class IntervalBuilder {

private final String field;
private final Analyzer analyzer;
Expand All @@ -44,6 +44,9 @@ public IntervalBuilder(String field, Analyzer analyzer) {
this.analyzer = analyzer;
}

/** Create term intervals for the provided term. */
protected abstract IntervalsSource termIntervals(BytesRef term);

public IntervalsSource analyzeText(String query, int maxGaps, boolean ordered) throws IOException {
try (TokenStream ts = analyzer.tokenStream(field, query);
CachingTokenFilter stream = new CachingTokenFilter(ts)) {
Expand Down Expand Up @@ -109,7 +112,7 @@ protected IntervalsSource analyzeTerm(TokenStream ts) throws IOException {
TermToBytesRefAttribute bytesAtt = ts.addAttribute(TermToBytesRefAttribute.class);
ts.reset();
ts.incrementToken();
return Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef()));
return termIntervals(BytesRef.deepCopyOf(bytesAtt.getBytesRef()));
}

protected static IntervalsSource combineSources(List<IntervalsSource> sources, int maxGaps, boolean ordered) {
Expand Down Expand Up @@ -138,7 +141,7 @@ protected List<IntervalsSource> analyzeTerms(TokenStream ts) throws IOException
while (ts.incrementToken()) {
BytesRef term = bytesAtt.getBytesRef();
int precedingSpaces = posAtt.getPositionIncrement() - 1;
terms.add(extend(Intervals.term(BytesRef.deepCopyOf(term)), precedingSpaces));
terms.add(extend(termIntervals(BytesRef.deepCopyOf(term)), precedingSpaces));
}
ts.end();
return terms;
Expand Down Expand Up @@ -170,7 +173,7 @@ else if (synonyms.size() > 1) {
synonyms.clear();
spaces = posInc - 1;
}
synonyms.add(Intervals.term(BytesRef.deepCopyOf(bytesAtt.getBytesRef())));
synonyms.add(termIntervals(BytesRef.deepCopyOf(bytesAtt.getBytesRef())));
}
if (synonyms.size() == 1) {
terms.add(extend(synonyms.get(0), spaces));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,10 @@

package org.elasticsearch.index.query;

import org.apache.lucene.index.Term;
import org.apache.lucene.queries.intervals.FilteredIntervalsSource;
import org.apache.lucene.queries.intervals.IntervalIterator;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParseField;
Expand Down Expand Up @@ -128,23 +126,36 @@ public Match(StreamInput in) throws IOException {
}
}

private IntervalsSource intervals(MappedFieldType fieldType, String text, int maxGaps, boolean ordered, NamedAnalyzer analyzer,
SearchExecutionContext context) throws IOException {
IntervalBuilder builder = new IntervalBuilder(fieldType.name(), analyzer) {
@Override
protected IntervalsSource termIntervals(BytesRef term) {
return fieldType.termIntervals(term, context);
}
};
return builder.analyzeText(text, maxGaps, ordered);
}

@Override
public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType fieldType) throws IOException {
NamedAnalyzer analyzer = null;
if (this.analyzer != null) {
analyzer = context.getIndexAnalyzers().get(this.analyzer);
}
IntervalsSource source;
if (useField != null) {
fieldType = context.getFieldType(useField);
assert fieldType != null;
source = Intervals.fixField(useField, fieldType.intervals(query, maxGaps, ordered, analyzer, false));
}
else {
source = fieldType.intervals(query, maxGaps, ordered, analyzer, false);
if (analyzer == null) {
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
}
IntervalsSource source = intervals(fieldType, query, maxGaps, ordered, analyzer, context);
if (useField != null) {
source = Intervals.fixField(useField, source);
}
if (filter != null) {
return filter.filter(source, context, fieldType);
source = filter.filter(source, context, fieldType);
}
return source;
}
Expand Down Expand Up @@ -517,14 +528,17 @@ public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType
if (this.analyzer != null) {
analyzer = context.getIndexAnalyzers().get(this.analyzer);
}
IntervalsSource source;
if (useField != null) {
fieldType = context.getFieldType(useField);
assert fieldType != null;
source = Intervals.fixField(useField, fieldType.intervals(prefix, 0, false, analyzer, true));
}
else {
source = fieldType.intervals(prefix, 0, false, analyzer, true);
if (analyzer == null) {
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
}
final BytesRef prefixTerm = analyzer.normalize(fieldType.name(), prefix);
IntervalsSource source = fieldType.prefixIntervals(prefixTerm, context);
if (useField != null) {
source = Intervals.fixField(useField, source);
}
return source;
}
Expand Down Expand Up @@ -628,33 +642,23 @@ public Wildcard(StreamInput in) throws IOException {

@Override
public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType fieldType) {
NamedAnalyzer analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
NamedAnalyzer analyzer = null;
if (this.analyzer != null) {
analyzer = context.getIndexAnalyzers().get(this.analyzer);
}
IntervalsSource source;
if (useField != null) {
fieldType = context.getFieldType(useField);
assert fieldType != null;
checkPositions(fieldType);
if (this.analyzer == null) {
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
}
BytesRef normalizedTerm = analyzer.normalize(useField, pattern);
source = Intervals.fixField(useField, Intervals.wildcard(normalizedTerm));
}
else {
checkPositions(fieldType);
BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), pattern);
source = Intervals.wildcard(normalizedTerm);
if (analyzer == null) {
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
}
return source;
}

private void checkPositions(MappedFieldType type) {
if (type.getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + type.name() + "] with no positions indexed");
BytesRef normalizedPattern = analyzer.normalize(fieldType.name(), pattern);
IntervalsSource source = fieldType.wildcardIntervals(normalizedPattern, context);
if (useField != null) {
source = Intervals.fixField(useField, source);
}
return source;
}

@Override
Expand Down Expand Up @@ -765,36 +769,27 @@ public Fuzzy(StreamInput in) throws IOException {

@Override
public IntervalsSource getSource(SearchExecutionContext context, MappedFieldType fieldType) {
NamedAnalyzer analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
NamedAnalyzer analyzer = null;
if (this.analyzer != null) {
analyzer = context.getIndexAnalyzers().get(this.analyzer);
}
IntervalsSource source;
if (useField != null) {
fieldType = context.getFieldType(useField);
assert fieldType != null;
checkPositions(fieldType);
if (this.analyzer == null) {
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
}
}
checkPositions(fieldType);
BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), term);
FuzzyQuery fq = new FuzzyQuery(new Term(fieldType.name(), normalizedTerm),
fuzziness.asDistance(term), prefixLength, 128, transpositions);
source = Intervals.multiterm(fq.getAutomata(), term);
if (analyzer == null) {
analyzer = fieldType.getTextSearchInfo().getSearchAnalyzer();
}
// Fuzzy queries only work with unicode content so it's legal to call utf8ToString here.
String normalizedTerm = analyzer.normalize(fieldType.name(), term).utf8ToString();
IntervalsSource source = fieldType.fuzzyIntervals(normalizedTerm, fuzziness.asDistance(term),
prefixLength, transpositions, context);
if (useField != null) {
source = Intervals.fixField(useField, source);
}
return source;
}

private void checkPositions(MappedFieldType type) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we change the check to still throw a comprehensive exception when positions are not available on the field ?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check has moved to TextFieldType so that things would still work with MatchOnlyTextFieldMapper which doesn't index positions but computes them on the fly. Does it address your question?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah right I skipped that part. Thanks, that addresses my question.

if (type.getTextSearchInfo().hasPositions() == false) {
throw new IllegalArgumentException("Cannot create intervals over field [" + type.name() + "] with no positions indexed");
}
}

@Override
public void extractFields(Set<String> fields) {
if (useField != null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,21 @@
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queries.intervals.Intervals;
import org.apache.lucene.queries.intervals.IntervalsSource;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.test.ESTestCase;

import java.io.IOException;

public class IntervalBuilderTests extends ESTestCase {

private static final IntervalBuilder BUILDER = new IntervalBuilder("field1", new StandardAnalyzer());
private static final IntervalBuilder BUILDER = new IntervalBuilder("field1", new StandardAnalyzer()) {

@Override
protected IntervalsSource termIntervals(BytesRef term) {
return Intervals.term(term);
}

};

public void testSimpleTerm() throws IOException {

Expand Down
Loading