Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions docs/changelog/96068.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 96068
summary: Use the Weight#matches mode for highlighting by default
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ public void testPhrasePrefix() throws IOException {
.highlighter(highlight().field("field0").order("score").preTags("<x>").postTags("</x>"));

searchResponse = client().search(new SearchRequest("first_test_index").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The <x>quick brown</x> fox jumps over the lazy dog"));

logger.info("--> highlighting and searching on field1");
source = searchSource().query(
Expand Down Expand Up @@ -293,8 +293,8 @@ public void testPhrasePrefix() throws IOException {
0,
1,
anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")
equalTo("The <x>quick browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick brown</x> fox jumps over the lazy dog")
)
);
assertHighlight(
Expand All @@ -304,8 +304,8 @@ public void testPhrasePrefix() throws IOException {
0,
1,
anyOf(
equalTo("The <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog")
equalTo("The <x>quick browse</x> button is a fancy thing, right bro?"),
equalTo("The <x>quick brown</x> fox jumps over the lazy dog")
)
);

Expand Down Expand Up @@ -343,7 +343,7 @@ public void testPhrasePrefix() throws IOException {

searchResponse = client().search(new SearchRequest("second_test_index").source(source)).actionGet();

assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog"));
assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The <x>quick brown</x> fox jumps over the lazy dog"));

logger.info("--> highlighting and searching on field4");
source = searchSource().postFilter(termQuery("type", "type2"))
Expand All @@ -358,8 +358,8 @@ public void testPhrasePrefix() throws IOException {
0,
1,
anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")
equalTo("<x>The quick browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The quick brown</x> fox jumps over the lazy dog")
)
);
assertHighlight(
Expand All @@ -369,8 +369,8 @@ public void testPhrasePrefix() throws IOException {
0,
1,
anyOf(
equalTo("<x>The</x> <x>quick</x> <x>browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The</x> <x>quick</x> <x>brown</x> fox jumps over the lazy dog")
equalTo("<x>The quick browse</x> button is a fancy thing, right bro?"),
equalTo("<x>The quick brown</x> fox jumps over the lazy dog")
)
);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,14 @@
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter;
import org.elasticsearch.search.fetch.FetchSubPhase.HitContext;
import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter;
import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext;
import org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

public class AnnotatedTextHighlighter extends UnifiedHighlighter {
public class AnnotatedTextHighlighter extends DefaultHighlighter {

public static final String NAME = "annotated";

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,21 +131,23 @@ private void assertHighlightOneDoc(
TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER);
assertThat(topDocs.totalHits.value, equalTo(1L));
String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR));
UnifiedHighlighter.Builder builder = UnifiedHighlighter.builder(searcher, hiliteAnalyzer);
builder.withBreakIterator(() -> breakIterator);
builder.withFieldMatcher(name -> "text".equals(name));
builder.withFormatter(passageFormatter);
CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(
searcher,
hiliteAnalyzer,
builder,
UnifiedHighlighter.OffsetSource.ANALYSIS,
passageFormatter,
locale,
breakIterator,
"index",
"text",
query,
noMatchSize,
expectedPassages.length,
name -> "text".equals(name),
maxAnalyzedOffset,
queryMaxAnalyzedOffset
queryMaxAnalyzedOffset,
true,
true
);
highlighter.setFieldMatcher((name) -> "text".equals(name));
final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue);
Expand Down Expand Up @@ -236,7 +238,7 @@ public void testAnnotatedTextSingleFieldWithBreakIterator() throws Exception {

public void testAnnotatedTextSingleFieldWithPhraseQuery() throws Exception {
final String[] markedUpInputs = { "[Donald Trump](Donald+Trump) visited Singapore", "Donald Jr was with Melania Trump" };
String[] expectedPassages = { "[Donald](_hit_term=donald) [Trump](_hit_term=trump) visited Singapore" };
String[] expectedPassages = { "[Donald Trump](_hit_term=donald+trump&Donald+Trump) visited Singapore" };
Query query = new PhraseQuery("text", "donald", "trump");
BreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR);
assertHighlightOneDoc("text", markedUpInputs, query, Locale.ROOT, breakIterator, 0, expectedPassages);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,18 @@ setup:
- do:
indices.refresh: {}

---
teardown:
- skip:
version: " - 8.9.99"
reason: "setting added in 8.10.0"

- do:
indices.put_settings:
index: test
body:
index.highlight.weight_matches_mode.enabled: null

---
"Basic multi_match query":
- do:
Expand All @@ -49,3 +61,35 @@ setup:
- match: {hits.hits.0.highlight.text.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
- match: {hits.hits.0.highlight.text\.fvh.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}
- match: {hits.hits.0.highlight.text\.postings.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is <em>brown</em>."}

---
"Disable weight matches":
- skip:
version: " - 8.9.99"
reason: "support for weight matches was added in 8.10"

- do:
search:
body: {
"query": { "multi_match": { "query": "quick brown fox", "type": "phrase", "fields": [ "text*" ] } },
"highlight": { "type": "unified", "fields": { "*": { } } } }

- match: { hits.hits.0.highlight.text.0: "The <em>quick brown fox</em> is brown." }
- match: { hits.hits.0.highlight.text\.fvh.0: "The <em>quick brown fox</em> is brown." }
- match: { hits.hits.0.highlight.text\.postings.0: "The <em>quick brown fox</em> is brown." }

- do:
indices.put_settings:
index: test
body:
index.highlight.weight_matches_mode.enabled: "false"

- do:
search:
body: {
"query" : { "multi_match" : { "query" : "quick brown fox", "type": "phrase", "fields" : [ "text*"] } },
"highlight" : { "type" : "unified", "fields" : { "*" : {} } } }

- match: {hits.hits.0.highlight.text.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown."}
- match: {hits.hits.0.highlight.text\.fvh.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown."}
- match: {hits.hits.0.highlight.text\.postings.0: "The <em>quick</em> <em>brown</em> <em>fox</em> is brown."}
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ text single unified from reanalysis:
fields:
foo.analyze:
type: unified
- match: { hits.hits.0.highlight.foo\.analyze.0: <em>the</em> <em>quick</em> <em>brown</em> <em>fox</em> <em>jumped</em> <em>over</em> <em>the</em> <em>lazy</em> <em>dog</em> }
- match: { hits.hits.0.highlight.foo\.analyze.0: <em>the quick brown fox jumped over the lazy dog</em> }

---
text multi unified from reanalysis:
Expand All @@ -225,7 +225,7 @@ text multi unified from reanalysis:
fields:
foo.analyze:
type: unified
- match: { hits.hits.0.highlight.foo\.analyze.0: <em>That</em> <em>makes</em> <em>calamity</em> <em>of</em> <em>so</em> <em>long</em> <em>life</em>. }
- match: { hits.hits.0.highlight.foo\.analyze.0: <em>That makes calamity of so long life</em>. }

---
text single unified from positions:
Expand All @@ -240,7 +240,7 @@ text single unified from positions:
fields:
foo.positions:
type: unified
- match: { hits.hits.0.highlight.foo\.positions.0: <em>the</em> <em>quick</em> <em>brown</em> <em>fox</em> <em>jumped</em> <em>over</em> <em>the</em> <em>lazy</em> <em>dog</em> }
- match: { hits.hits.0.highlight.foo\.positions.0: <em>the quick brown fox jumped over the lazy dog</em> }

---
text multi unified from positions:
Expand All @@ -255,7 +255,7 @@ text multi unified from positions:
fields:
foo.positions:
type: unified
- match: { hits.hits.0.highlight.foo\.positions.0: <em>That</em> <em>makes</em> <em>calamity</em> <em>of</em> <em>so</em> <em>long</em> <em>life</em>. }
- match: { hits.hits.0.highlight.foo\.positions.0: <em>That makes calamity of so long life</em>. }

---
text single unified from vectors:
Expand All @@ -270,7 +270,7 @@ text single unified from vectors:
fields:
foo.vectors:
type: unified
- match: { hits.hits.0.highlight.foo\.vectors.0: <em>the</em> <em>quick</em> <em>brown</em> <em>fox</em> <em>jumped</em> <em>over</em> <em>the</em> <em>lazy</em> <em>dog</em> }
- match: { hits.hits.0.highlight.foo\.vectors.0: <em>the quick brown fox jumped over the lazy dog</em> }

---
text multi unified from vectors:
Expand All @@ -285,7 +285,7 @@ text multi unified from vectors:
fields:
foo.vectors:
type: unified
- match: { hits.hits.0.highlight.foo\.vectors.0: <em>That</em> <em>makes</em> <em>calamity</em> <em>of</em> <em>so</em> <em>long</em> <em>life</em>. }
- match: { hits.hits.0.highlight.foo\.vectors.0: <em>That makes calamity of so long life</em>. }

---
text single fvh source order:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2217,14 +2217,7 @@ public void testPostingsHighlighter() throws Exception {

searchResponse = client().search(new SearchRequest("test").source(source)).actionGet();

assertHighlight(
searchResponse,
0,
"field2",
0,
1,
equalTo("The <xxx>quick</xxx> <xxx>brown</xxx> fox jumps over the lazy quick dog")
);
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <xxx>quick brown</xxx> fox jumps over the lazy quick dog"));

// lets fall back to the standard highlighter then, what people would do to highlight query matches
logger.info("--> searching on field2, highlighting on field2, falling back to the plain highlighter");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,12 @@
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.automaton.CompiledAutomaton;

import java.io.IOException;
import java.util.ArrayList;
Expand Down Expand Up @@ -295,20 +297,12 @@ public void visit(QueryVisitor visitor) {
shouldVisitor.consumeTerms(this, termArrays.get(i));
}
}
/* We don't report automata here because this breaks the unified highlighter,
which extracts automata separately from phrases. MPPQ gets rewritten to a
SpanMTQQuery by the PhraseHelper in any case, so highlighting is taken
care of there instead. If we extract automata here then the trailing prefix
word will be highlighted wherever it appears in the document, instead of only
as part of a phrase. This can be re-instated once we switch to using Matches
to highlight.
for (Term prefixTerm : termArrays.get(termArrays.size() - 1)) {
visitor.consumeTermsMatching(this, field, () -> {
CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(prefixTerm.bytes()));
return ca.runAutomaton;
});
}
*/
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings {
IndexSettings.MAX_SHINGLE_DIFF_SETTING,
IndexSettings.MAX_RESCORE_WINDOW_SETTING,
IndexSettings.MAX_ANALYZED_OFFSET_SETTING,
IndexSettings.WEIGHT_MATCHES_MODE_ENABLED_SETTING,
IndexSettings.MAX_TERMS_COUNT_SETTING,
IndexSettings.INDEX_TRANSLOG_SYNC_INTERVAL_SETTING,
IndexSettings.DEFAULT_FIELD_SETTING,
Expand Down
23 changes: 23 additions & 0 deletions server/src/main/java/org/elasticsearch/index/IndexSettings.java
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.util.Strings;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.search.uhighlight.UnifiedHighlighter;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.routing.IndexRouting;
Expand Down Expand Up @@ -187,6 +188,17 @@ public final class IndexSettings {
Property.IndexScope
);

/**
* Index setting to enable/disable the {@link UnifiedHighlighter.HighlightFlag#WEIGHT_MATCHES}
* mode of the unified highlighter.
*/
public static final Setting<Boolean> WEIGHT_MATCHES_MODE_ENABLED_SETTING = Setting.boolSetting(
"index.highlight.weight_matches_mode.enabled",
true,
Property.Dynamic,
Property.IndexScope
);

/**
* Index setting describing the maximum number of terms that can be used in Terms Query.
* The default maximum of 65536 terms is defensive, as extra processing and memory is involved
Expand Down Expand Up @@ -730,6 +742,7 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) {
private volatile int maxShingleDiff;
private volatile TimeValue searchIdleAfter;
private volatile int maxAnalyzedOffset;
private volatile boolean weightMatchesEnabled;
private volatile int maxTermsCount;
private volatile String defaultPipeline;
private volatile String requiredPipeline;
Expand Down Expand Up @@ -871,6 +884,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
maxRefreshListeners = scopedSettings.get(MAX_REFRESH_LISTENERS_PER_SHARD);
maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL);
maxAnalyzedOffset = scopedSettings.get(MAX_ANALYZED_OFFSET_SETTING);
weightMatchesEnabled = scopedSettings.get(WEIGHT_MATCHES_MODE_ENABLED_SETTING);
maxTermsCount = scopedSettings.get(MAX_TERMS_COUNT_SETTING);
maxRegexLength = scopedSettings.get(MAX_REGEX_LENGTH_SETTING);
this.mergePolicyConfig = new MergePolicyConfig(logger, this);
Expand Down Expand Up @@ -946,6 +960,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti
scopedSettings.addSettingsUpdateConsumer(INDEX_REFRESH_INTERVAL_SETTING, this::setRefreshInterval);
scopedSettings.addSettingsUpdateConsumer(MAX_REFRESH_LISTENERS_PER_SHARD, this::setMaxRefreshListeners);
scopedSettings.addSettingsUpdateConsumer(MAX_ANALYZED_OFFSET_SETTING, this::setHighlightMaxAnalyzedOffset);
scopedSettings.addSettingsUpdateConsumer(WEIGHT_MATCHES_MODE_ENABLED_SETTING, this::setWeightMatchesEnabled);
scopedSettings.addSettingsUpdateConsumer(MAX_TERMS_COUNT_SETTING, this::setMaxTermsCount);
scopedSettings.addSettingsUpdateConsumer(MAX_SLICES_PER_SCROLL, this::setMaxSlicesPerScroll);
scopedSettings.addSettingsUpdateConsumer(DEFAULT_FIELD_SETTING, this::setDefaultFields);
Expand Down Expand Up @@ -1325,6 +1340,14 @@ private void setHighlightMaxAnalyzedOffset(int maxAnalyzedOffset) {
this.maxAnalyzedOffset = maxAnalyzedOffset;
}

public boolean isWeightMatchesEnabled() {
return this.weightMatchesEnabled;
}

private void setWeightMatchesEnabled(boolean value) {
this.weightMatchesEnabled = value;
}

/**
* Returns the maximum number of terms that can be used in a Terms Query request
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -131,15 +131,16 @@ public int preceding(int offset) {
}

/**
* Can be invoked only after a call to preceding(offset+1).
* Can be invoked only after a call to preceding().
*
* See {@link FieldHighlighter} for usage.
*/
@Override
public int following(int offset) {
if (offset != lastPrecedingOffset || innerEnd == -1) {
throw new IllegalArgumentException("offset != lastPrecedingOffset: " + "usage doesn't look like UnifiedHighlighter");
if (innerEnd == -1) {
throw new IllegalArgumentException("preceding should be called first, usage doesn't look like UnifiedHighlighter");
}
return innerEnd;
return Math.max(offset, innerEnd);
}

/**
Expand Down
Loading