From da7a899ee24d120420bd59bbb0e2a5b15cb9a263 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 12 May 2023 16:07:24 +0100 Subject: [PATCH 01/15] Use the Weight#matches API for highlighting by default This PR adapts the unified highlighter to use the Weight#matches API by default when possible. This is the default mode in Lucene for some time now. For cases where the matches API won't work (nested and parent-child queries), the matches mode is disabled automatically. I didn't expose an option to explicitly disable this mode because that should be seen as an internal implementation detail. With this change, matches that span multiple terms are highlighted together (something that users asked for years) and the clauses that don't match the document are ignored. --- .../common/HighlighterWithAnalyzersTests.java | 20 +-- .../join/query/ChildQuerySearchIT.java | 2 +- .../AnnotatedTextHighlighter.java | 4 +- .../AnnotatedTextHighlighterTests.java | 12 +- .../highlight/HighlighterSearchIT.java | 9 +- .../lucene/search/MultiPhrasePrefixQuery.java | 10 +- .../BoundedBreakIteratorScanner.java | 9 +- .../uhighlight/CustomFieldHighlighter.java | 81 +---------- .../uhighlight/CustomUnifiedHighlighter.java | 137 ++++++++---------- .../elasticsearch/search/SearchModule.java | 4 +- ...ghlighter.java => DefaultHighlighter.java} | 34 +++-- .../subphase/highlight/PlainHighlighter.java | 2 +- .../CustomUnifiedHighlighterTests.java | 22 +-- .../search/SearchModuleTests.java | 4 +- .../search/fetch/HighlighterTestCase.java | 4 +- 15 files changed, 128 insertions(+), 226 deletions(-) rename server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/{UnifiedHighlighter.java => DefaultHighlighter.java} (91%) diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java index 39284978dbeec..d7e4dbf4c0c01 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/HighlighterWithAnalyzersTests.java @@ -258,7 +258,7 @@ public void testPhrasePrefix() throws IOException { .highlighter(highlight().field("field0").order("score").preTags("").postTags("")); searchResponse = client().search(new SearchRequest("first_test_index").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + assertHighlight(searchResponse, 0, "field0", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); logger.info("--> highlighting and searching on field1"); source = searchSource().query( @@ -293,8 +293,8 @@ public void testPhrasePrefix() throws IOException { 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog") + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog") ) ); assertHighlight( @@ -304,8 +304,8 @@ public void testPhrasePrefix() throws IOException { 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog") + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog") ) ); @@ -343,7 +343,7 @@ public void testPhrasePrefix() throws IOException { searchResponse = client().search(new SearchRequest("second_test_index").source(source)).actionGet(); - assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); + assertHighlight(searchResponse, 0, "field3", 0, 1, equalTo("The quick brown fox jumps over the lazy dog")); logger.info("--> highlighting and searching on field4"); source = searchSource().postFilter(termQuery("type", "type2")) @@ -358,8 +358,8 @@ public void testPhrasePrefix() throws IOException { 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog") + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog") ) ); assertHighlight( @@ -369,8 +369,8 @@ public void testPhrasePrefix() throws IOException { 0, 1, anyOf( - equalTo("The quick browse button is a fancy thing, right bro?"), - equalTo("The quick brown fox jumps over the lazy dog") + equalTo("The quick browse button is a fancy thing, right bro?"), + equalTo("The quick brown fox jumps over the lazy dog") ) ); diff --git a/modules/parent-join/src/internalClusterTest/java/org/elasticsearch/join/query/ChildQuerySearchIT.java b/modules/parent-join/src/internalClusterTest/java/org/elasticsearch/join/query/ChildQuerySearchIT.java index 2821c38a6b248..c1ec58939e800 100644 --- a/modules/parent-join/src/internalClusterTest/java/org/elasticsearch/join/query/ChildQuerySearchIT.java +++ b/modules/parent-join/src/internalClusterTest/java/org/elasticsearch/join/query/ChildQuerySearchIT.java @@ -1743,7 +1743,7 @@ public void testHighlightersIgnoreParentChild() throws IOException { createIndexRequest("test", "child-type", "child-id", "parent-id", "searchText", "quick brown fox").get(); refresh(); - String[] highlightTypes = new String[] { "plain", "fvh", "unified" }; + String[] highlightTypes = new String[] { "unified" }; for (String highlightType : highlightTypes) { logger.info("Testing with highlight type [{}]", highlightType); SearchResponse searchResponse = client().prepareSearch("test") diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java index 4a9cc95839fd1..768d907833380 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java @@ -18,13 +18,13 @@ import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter; import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; -import org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter; +import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; import java.io.IOException; import java.util.ArrayList; import java.util.List; -public class AnnotatedTextHighlighter extends UnifiedHighlighter { +public class AnnotatedTextHighlighter extends DefaultHighlighter { public static final String NAME = "annotated"; diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java index f73bb7cbacbdc..3e2891f1594c7 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java @@ -131,19 +131,19 @@ private void assertHighlightOneDoc( TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER); assertThat(topDocs.totalHits.value, equalTo(1L)); String rawValue = Strings.collectionToDelimitedString(plainTextForHighlighter, String.valueOf(MULTIVAL_SEP_CHAR)); + UnifiedHighlighter.Builder builder = UnifiedHighlighter.builder(searcher, hiliteAnalyzer); + builder.withBreakIterator(() -> breakIterator); + builder.withFieldMatcher(name -> "text".equals(name)); + builder.withFormatter(passageFormatter); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter( - searcher, - hiliteAnalyzer, + builder, UnifiedHighlighter.OffsetSource.ANALYSIS, - passageFormatter, locale, - breakIterator, "index", "text", query, noMatchSize, expectedPassages.length, - name -> "text".equals(name), maxAnalyzedOffset, queryMaxAnalyzedOffset ); @@ -236,7 +236,7 @@ public void testAnnotatedTextSingleFieldWithBreakIterator() throws Exception { public void testAnnotatedTextSingleFieldWithPhraseQuery() throws Exception { final String[] markedUpInputs = { "[Donald Trump](Donald+Trump) visited Singapore", "Donald Jr was with Melania Trump" }; - String[] expectedPassages = { "[Donald](_hit_term=donald) [Trump](_hit_term=trump) visited Singapore" }; + String[] expectedPassages = { "[Donald Trump](_hit_term=donald+trump&Donald+Trump) visited Singapore" }; Query query = new PhraseQuery("text", "donald", "trump"); BreakIterator breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR); assertHighlightOneDoc("text", markedUpInputs, query, Locale.ROOT, breakIterator, 0, expectedPassages); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index cf345e9afdd91..267c546f5971f 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -2217,14 +2217,7 @@ public void testPostingsHighlighter() throws Exception { searchResponse = client().search(new SearchRequest("test").source(source)).actionGet(); - assertHighlight( - searchResponse, - 0, - "field2", - 0, - 1, - equalTo("The quick brown fox jumps over the lazy quick dog") - ); + assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The quick brown fox jumps over the lazy quick dog")); // lets fall back to the standard highlighter then, what people would do to highlight query matches logger.info("--> searching on field2, highlighting on field2, falling back to the plain highlighter"); diff --git a/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java b/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java index 376ed6b2626b5..faf73e6f7349e 100644 --- a/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java +++ b/server/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java @@ -16,10 +16,12 @@ import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.MatchNoDocsQuery; import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryVisitor; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.automaton.CompiledAutomaton; import java.io.IOException; import java.util.ArrayList; @@ -294,20 +296,12 @@ public void visit(QueryVisitor visitor) { shouldVisitor.consumeTerms(this, termArrays.get(i)); } } - /* We don't report automata here because this breaks the unified highlighter, - which extracts automata separately from phrases. MPPQ gets rewritten to a - SpanMTQQuery by the PhraseHelper in any case, so highlighting is taken - care of there instead. If we extract automata here then the trailing prefix - word will be highlighted wherever it appears in the document, instead of only - as part of a phrase. This can be re-instated once we switch to using Matches - to highlight. for (Term prefixTerm : termArrays.get(termArrays.size() - 1)) { visitor.consumeTermsMatching(this, field, () -> { CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(prefixTerm.bytes())); return ca.runAutomaton; }); } - */ } } } diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/BoundedBreakIteratorScanner.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/BoundedBreakIteratorScanner.java index 8394628746392..626820ce89906 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/BoundedBreakIteratorScanner.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/BoundedBreakIteratorScanner.java @@ -131,15 +131,16 @@ public int preceding(int offset) { } /** - * Can be invoked only after a call to preceding(offset+1). + * Can be invoked only after a call to preceding(). + * * See {@link FieldHighlighter} for usage. */ @Override public int following(int offset) { - if (offset != lastPrecedingOffset || innerEnd == -1) { - throw new IllegalArgumentException("offset != lastPrecedingOffset: " + "usage doesn't look like UnifiedHighlighter"); + if (innerEnd == -1) { + throw new IllegalArgumentException("preceding should be called first, usage doesn't look like UnifiedHighlighter"); } - return innerEnd; + return Math.max(offset, innerEnd); } /** diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java index cd781829dd08c..3d8d6c41183ee 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomFieldHighlighter.java @@ -15,14 +15,10 @@ import org.apache.lucene.search.uhighlight.Passage; import org.apache.lucene.search.uhighlight.PassageFormatter; import org.apache.lucene.search.uhighlight.PassageScorer; -import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.text.BreakIterator; -import java.util.Arrays; -import java.util.Comparator; import java.util.Locale; -import java.util.PriorityQueue; import static org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR; @@ -102,86 +98,11 @@ protected Passage[] getSummaryPassagesNoHighlight(int maxPassages) { return EMPTY_PASSAGE; } - // TODO: use FieldHighlighter::highlightOffsetsEnums and modify BoundedBreakIteratorScanner to work with it - // LUCENE-9093 modified how FieldHighlighter breaks texts into passages, - // which doesn't work well with BoundedBreakIteratorScanner - // This is the copy of highlightOffsetsEnums before LUCENE-9093. @Override protected Passage[] highlightOffsetsEnums(OffsetsEnum off) throws IOException { - if (queryMaxAnalyzedOffset != null) { off = new LimitedOffsetsEnum(off, queryMaxAnalyzedOffset); } - - final int contentLength = this.breakIterator.getText().getEndIndex(); - - if (off.nextPosition() == false) { - return new Passage[0]; - } - - PriorityQueue passageQueue = new PriorityQueue<>(Math.min(64, maxPassages + 1), (left, right) -> { - if (left.getScore() < right.getScore()) { - return -1; - } else if (left.getScore() > right.getScore()) { - return 1; - } else { - return left.getStartOffset() - right.getStartOffset(); - } - }); - Passage passage = new Passage(); // the current passage in-progress. Will either get reset or added to queue. - - do { - int start = off.startOffset(); - if (start == -1) { - throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight"); - } - int end = off.endOffset(); - if (start < contentLength && end > contentLength) { - continue; - } - // See if this term should be part of a new passage. - if (start >= passage.getEndOffset()) { - passage = maybeAddPassage(passageQueue, passageScorer, passage, contentLength); - // if we exceed limit, we are done - if (start >= contentLength) { - break; - } - passage.setStartOffset(Math.max(this.breakIterator.preceding(start + 1), 0)); - passage.setEndOffset(Math.min(this.breakIterator.following(start), contentLength)); - } - // Add this term to the passage. - BytesRef term = off.getTerm();// a reference; safe to refer to - assert term != null; - passage.addMatch(start, end, term, off.freq()); - } while (off.nextPosition()); - maybeAddPassage(passageQueue, passageScorer, passage, contentLength); - - Passage[] passages = passageQueue.toArray(new Passage[passageQueue.size()]); - // sort in ascending order - Arrays.sort(passages, Comparator.comparingInt(Passage::getStartOffset)); - return passages; - } - - // TODO: use FieldHighlighter::maybeAddPassage - // After removing CustomFieldHighlighter::highlightOffsetsEnums, remove this method as well. - private Passage maybeAddPassage(PriorityQueue passageQueue, PassageScorer scorer, Passage passage, int contentLength) { - if (passage.getStartOffset() == -1) { - // empty passage, we can ignore it - return passage; - } - passage.setScore(scorer.score(passage, contentLength)); - // new sentence: first add 'passage' to queue - if (passageQueue.size() == maxPassages && passage.getScore() < passageQueue.peek().getScore()) { - passage.reset(); // can't compete, just reset it - } else { - passageQueue.offer(passage); - if (passageQueue.size() > maxPassages) { - passage = passageQueue.poll(); - passage.reset(); - } else { - passage = new Passage(); - } - } - return passage; + return super.highlightOffsetsEnums(off); } } diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java index ca5d50ba10e89..600cba528dc84 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -8,7 +8,6 @@ package org.elasticsearch.lucene.search.uhighlight; -import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.Term; import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper; @@ -16,18 +15,16 @@ import org.apache.lucene.queries.spans.SpanOrQuery; import org.apache.lucene.queries.spans.SpanQuery; import org.apache.lucene.queries.spans.SpanTermQuery; -import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryVisitor; +import org.apache.lucene.search.uhighlight.FieldHighlighter; import org.apache.lucene.search.uhighlight.FieldOffsetStrategy; -import org.apache.lucene.search.uhighlight.LabelledCharArrayMatcher; import org.apache.lucene.search.uhighlight.NoOpOffsetStrategy; import org.apache.lucene.search.uhighlight.PassageFormatter; -import org.apache.lucene.search.uhighlight.PhraseHelper; -import org.apache.lucene.search.uhighlight.SplittingBreakIterator; -import org.apache.lucene.search.uhighlight.UHComponents; +import org.apache.lucene.search.uhighlight.PassageScorer; import org.apache.lucene.search.uhighlight.UnifiedHighlighter; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CheckedSupplier; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.core.Nullable; @@ -38,10 +35,7 @@ import java.text.BreakIterator; import java.util.Collection; import java.util.Collections; -import java.util.EnumSet; import java.util.Locale; -import java.util.Set; -import java.util.function.Predicate; import static org.elasticsearch.search.fetch.subphase.highlight.AbstractHighlighterBuilder.MAX_ANALYZED_OFFSET_FIELD; @@ -58,8 +52,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter { private static final Snippet[] EMPTY_SNIPPET = new Snippet[0]; private final OffsetSource offsetSource; - private final PassageFormatter passageFormatter; - private final BreakIterator breakIterator; private final String index; private final String field; private final Locale breakIteratorLocale; @@ -71,51 +63,42 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter { /** * Creates a new instance of {@link CustomUnifiedHighlighter} * - * @param analyzer the analyzer used for the field at index time, used for multi term queries internally. + * @param builder the {@link UnifiedHighlighter.Builder} for the underlying highlighter. * @param offsetSource the {@link OffsetSource} to used for offsets retrieval. - * @param passageFormatter our own {@link CustomPassageFormatter} - * which generates snippets in forms of {@link Snippet} objects. * @param breakIteratorLocale the {@link Locale} to use for dividing text into passages. * If null {@link Locale#ROOT} is used. - * @param breakIterator the {@link BreakIterator} to use for dividing text into passages. - * If null {@link BreakIterator#getSentenceInstance(Locale)} is used. * @param index the index we're highlighting, mostly used for error messages * @param field the name of the field we're highlighting * @param query the query we're highlighting * @param noMatchSize The size of the text that should be returned when no highlighting can be performed. * @param maxPassages the maximum number of passes to highlight - * @param fieldMatcher decides which terms should be highlighted * @param maxAnalyzedOffset if the field is more than this long we'll refuse to use the ANALYZED * offset source for it because it'd be super slow */ public CustomUnifiedHighlighter( - IndexSearcher searcher, - Analyzer analyzer, + Builder builder, OffsetSource offsetSource, - PassageFormatter passageFormatter, @Nullable Locale breakIteratorLocale, - @Nullable BreakIterator breakIterator, String index, String field, Query query, int noMatchSize, int maxPassages, - Predicate fieldMatcher, int maxAnalyzedOffset, Integer queryMaxAnalyzedOffset ) throws IOException { - super(searcher, analyzer); + super(builder); this.offsetSource = offsetSource; - this.breakIterator = breakIterator; this.breakIteratorLocale = breakIteratorLocale == null ? Locale.ROOT : breakIteratorLocale; - this.passageFormatter = passageFormatter; this.index = index; this.field = field; this.noMatchSize = noMatchSize; - this.setFieldMatcher(fieldMatcher); this.maxAnalyzedOffset = maxAnalyzedOffset; this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset; - fieldHighlighter = getFieldHighlighter(field, query, extractTerms(query), maxPassages); + if (canExtractWeightMatches(query)) { + getFlags(field).remove(HighlightFlag.WEIGHT_MATCHES); + } + fieldHighlighter = (CustomFieldHighlighter) getFieldHighlighter(field, query, extractTerms(query), maxPassages); } /** @@ -132,7 +115,7 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier maxAnalyzedOffset) - && (offsetSource == OffsetSource.ANALYSIS) + && (getOffsetSource(field) == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset))) { throw new IllegalArgumentException( "The length [" @@ -160,33 +143,18 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier allTerms, int maxPassages) { - Predicate fieldMatcher = getFieldMatcher(field); - BytesRef[] terms = filterExtractedTerms(fieldMatcher, allTerms); - Set highlightFlags = getFlags(field); - PhraseHelper phraseHelper = getPhraseHelper(field, query, highlightFlags); - LabelledCharArrayMatcher[] automata = getAutomata(field, query, highlightFlags); - UHComponents components = new UHComponents(field, fieldMatcher, query, terms, phraseHelper, automata, false, highlightFlags); - OffsetSource offsetSource = getOptimizedOffsetSource(components); - BreakIterator breakIterator = new SplittingBreakIterator(getBreakIterator(field), UnifiedHighlighter.MULTIVAL_SEP_CHAR); - FieldOffsetStrategy strategy = getOffsetStrategy(offsetSource, components); + protected FieldHighlighter newFieldHighlighter( + String field, + FieldOffsetStrategy fieldOffsetStrategy, + BreakIterator breakIterator, + PassageScorer passageScorer, + int maxPassages, + int maxNoHighlightPassages, + PassageFormatter passageFormatter + ) { return new CustomFieldHighlighter( field, - strategy, + fieldOffsetStrategy, breakIteratorLocale, breakIterator, getScorer(field), @@ -198,30 +166,8 @@ protected CustomFieldHighlighter getFieldHighlighter(String field, Query query, ); } - @Override - protected Set getFlags(String field) { - Set highlightFlags = EnumSet.noneOf(HighlightFlag.class); - if (shouldHandleMultiTermQuery(field)) { - highlightFlags.add(HighlightFlag.MULTI_TERM_QUERY); - } - if (shouldHighlightPhrasesStrictly(field)) { - highlightFlags.add(HighlightFlag.PHRASES); - } - if (shouldPreferPassageRelevancyOverSpeed(field)) { - highlightFlags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED); - } - return highlightFlags; - } - @Override protected Collection preSpanQueryRewrite(Query query) { - return rewriteCustomQuery(query); - } - - /** - * Translate custom queries in queries that are supported by the unified highlighter. - */ - private static Collection rewriteCustomQuery(Query query) { if (query instanceof MultiPhrasePrefixQuery mpq) { Term[][] terms = mpq.getTerms(); int[] positions = mpq.getPositions(); @@ -272,4 +218,43 @@ protected OffsetSource getOffsetSource(String field) { } return offsetSource; } + + /** + * Returns true if the provided {@link Query} is not compatible with the {@link HighlightFlag#WEIGHT_MATCHES} + * mode of this highlighter. + * + * @param query The query to highlight + */ + private boolean canExtractWeightMatches(Query query) { + + boolean[] hasUnknownLeaf = new boolean[1]; + query.visit(new QueryVisitor() { + + @Override + public void visitLeaf(Query leafQuery) { + /** + * The parent-child query requires to load global ordinals and to access + * documents outside of the scope of the highlighted doc. + * We disable the {@link HighlightFlag#WEIGHT_MATCHES} mode in this case + * in order to preserve the compatibility. + */ + if (leafQuery.getClass().getSimpleName().equals("LateParsingQuery")) { + hasUnknownLeaf[0] = true; + } + super.visitLeaf(query); + } + + @Override + public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) { + /** + * Disable nested query support. + */ + if (parent instanceof ESToParentBlockJoinQuery) { + hasUnknownLeaf[0] = true; + } + return super.getSubVisitor(occur, parent); + } + }); + return hasUnknownLeaf[0]; + } } diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java index 478138c366f6d..a50d7c5e1f99e 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchModule.java +++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java @@ -217,11 +217,11 @@ import org.elasticsearch.search.fetch.subphase.ScriptFieldsPhase; import org.elasticsearch.search.fetch.subphase.SeqNoPrimaryTermPhase; import org.elasticsearch.search.fetch.subphase.StoredFieldsPhase; +import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; import org.elasticsearch.search.fetch.subphase.highlight.FastVectorHighlighter; import org.elasticsearch.search.fetch.subphase.highlight.HighlightPhase; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter; -import org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter; import org.elasticsearch.search.internal.ShardSearchRequest; import org.elasticsearch.search.rescore.QueryRescorerBuilder; import org.elasticsearch.search.rescore.RescorerBuilder; @@ -865,7 +865,7 @@ private static Map setupHighlighters(Settings settings, Lis NamedRegistry highlighters = new NamedRegistry<>("highlighter"); highlighters.register("fvh", new FastVectorHighlighter(settings)); highlighters.register("plain", new PlainHighlighter()); - highlighters.register("unified", new UnifiedHighlighter()); + highlighters.register("unified", new DefaultHighlighter()); highlighters.extractAndRegister(plugins, SearchPlugin::getHighlighters); return unmodifiableMap(highlighters.getRegistry()); diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java similarity index 91% rename from server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java rename to server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index e880f82f10953..d76ba683cf901 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -12,6 +12,8 @@ import org.apache.lucene.search.highlight.Encoder; import org.apache.lucene.search.uhighlight.CustomSeparatorBreakIterator; import org.apache.lucene.search.uhighlight.PassageFormatter; +import org.apache.lucene.search.uhighlight.UnifiedHighlighter; +import org.apache.lucene.search.uhighlight.UnifiedHighlighter.Builder; import org.apache.lucene.search.uhighlight.UnifiedHighlighter.OffsetSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CollectionUtil; @@ -42,7 +44,7 @@ import static org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR; -public class UnifiedHighlighter implements Highlighter { +public class DefaultHighlighter implements Highlighter { @Override public boolean canHighlight(MappedFieldType fieldType) { return true; @@ -52,7 +54,7 @@ public boolean canHighlight(MappedFieldType fieldType) { public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException { @SuppressWarnings("unchecked") Map cache = (Map) fieldContext.cache.computeIfAbsent( - UnifiedHighlighter.class.getName(), + DefaultHighlighter.class.getName(), k -> new HashMap<>() ); if (cache.containsKey(fieldContext.fieldName) == false) { @@ -107,6 +109,7 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th Encoder encoder = fieldContext.field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; + int maxAnalyzedOffset = fieldContext.context.getSearchExecutionContext().getIndexSettings().getHighlightMaxAnalyzedOffset(); int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments(); Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset(); @@ -118,7 +121,7 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th IndexSearcher searcher = fieldContext.context.searcher(); OffsetSource offsetSource = getOffsetSource(fieldContext.context, fieldContext.fieldType); BreakIterator breakIterator; - int higlighterNumberOfFragments; + int highlighterNumberOfFragments; if (numberOfFragments == 0 // non-tokenized fields should not use any break iterator (ignore boundaryScannerType) || fieldContext.fieldType.getTextSearchInfo().isTokenized() == false) { @@ -129,25 +132,25 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th * values of a field and we get back a snippet per value */ breakIterator = new CustomSeparatorBreakIterator(MULTIVAL_SEP_CHAR); - higlighterNumberOfFragments = numberOfFragments == 0 ? Integer.MAX_VALUE - 1 : numberOfFragments; + highlighterNumberOfFragments = numberOfFragments == 0 ? Integer.MAX_VALUE - 1 : numberOfFragments; } else { // using paragraph separator we make sure that each field value holds a discrete passage for highlighting breakIterator = getBreakIterator(fieldContext.field); - higlighterNumberOfFragments = numberOfFragments; + highlighterNumberOfFragments = numberOfFragments; } + Builder builder = UnifiedHighlighter.builder(searcher, analyzer); + builder.withBreakIterator(() -> breakIterator); + builder.withFieldMatcher(fieldMatcher(fieldContext)); + builder.withFormatter(passageFormatter); return new CustomUnifiedHighlighter( - searcher, - analyzer, + builder, offsetSource, - passageFormatter, fieldContext.field.fieldOptions().boundaryScannerLocale(), - breakIterator, fieldContext.context.getIndexName(), fieldContext.fieldName, fieldContext.query, fieldContext.field.fieldOptions().noMatchSize(), - higlighterNumberOfFragments, - fieldMatcher(fieldContext), + highlighterNumberOfFragments, maxAnalyzedOffset, fieldContext.field.fieldOptions().maxAnalyzedOffset() ); @@ -184,16 +187,17 @@ protected static BreakIterator getBreakIterator(SearchHighlightContext.Field fie : HighlightBuilder.BoundaryScannerType.SENTENCE; int maxLen = fieldOptions.fragmentCharSize(); switch (type) { - case SENTENCE: + case SENTENCE -> { if (maxLen > 0) { return BoundedBreakIteratorScanner.getSentence(locale, maxLen); } return BreakIterator.getSentenceInstance(locale); - case WORD: + } + case WORD -> { // ignore maxLen return BreakIterator.getWordInstance(locale); - default: - throw new IllegalArgumentException("Invalid boundary scanner type: " + type); + } + default -> throw new IllegalArgumentException("Invalid boundary scanner type: " + type); } } diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java index 475333ed5a012..e7fa0e67cb453 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java @@ -37,7 +37,7 @@ import java.util.Map; import static org.elasticsearch.search.fetch.subphase.highlight.AbstractHighlighterBuilder.MAX_ANALYZED_OFFSET_FIELD; -import static org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter.convertFieldValue; +import static org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter.convertFieldValue; public class PlainHighlighter implements Highlighter { private static final String CACHE_KEY = "highlight-plain"; diff --git a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java index 74d70f179697b..b7c9dd14cde26 100644 --- a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java +++ b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -134,19 +134,19 @@ private void assertHighlightOneDoc( TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER); assertThat(topDocs.totalHits.value, equalTo(1L)); String rawValue = Strings.arrayToDelimitedString(inputs, String.valueOf(MULTIVAL_SEP_CHAR)); + UnifiedHighlighter.Builder builder = UnifiedHighlighter.builder(searcher, analyzer); + builder.withBreakIterator(() -> breakIterator); + builder.withFieldMatcher(name -> "text".equals(name)); + builder.withFormatter(new CustomPassageFormatter("", "", new DefaultEncoder())); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter( - searcher, - analyzer, + builder, offsetSource, - new CustomPassageFormatter("", "", new DefaultEncoder()), locale, - breakIterator, "index", "text", query, noMatchSize, expectedPassages.length, - name -> "text".equals(name), maxAnalyzedOffset, queryMaxAnalyzedOffset ); @@ -218,7 +218,7 @@ public void testMultiPhrasePrefixQuerySingleTerm() throws Exception { public void testMultiPhrasePrefixQuery() throws Exception { final String[] inputs = { "The quick brown fox." }; - final String[] outputs = { "The quick brown fox." }; + final String[] outputs = { "The quick brown fox." }; MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery("text"); query.add(new Term("text", "quick")); query.add(new Term("text", "brown")); @@ -241,7 +241,7 @@ public void testSentenceBoundedBreakIterator() throws Exception { final String[] outputs = { "The quick brown", "fox in a long", - "with another quick", + "another quick", "brown fox.", "sentence with brown", "fox.", }; @@ -277,7 +277,7 @@ public void testSmallSentenceBoundedBreakIterator() throws Exception { ); } - public void testRepeat() throws Exception { + public void testRepeatTerm() throws Exception { final String[] inputs = { "Fun fun fun fun fun fun fun fun fun fun" }; final String[] outputs = { "Fun fun fun", @@ -295,8 +295,12 @@ public void testRepeat() throws Exception { 0, outputs ); + } - query = new PhraseQuery.Builder().add(new Term("text", "fun")).add(new Term("text", "fun")).build(); + public void testRepeatPhrase() throws Exception { + final String[] inputs = { "Fun fun fun fun fun fun fun fun fun fun" }; + final String[] outputs = { "Fun fun fun", "fun fun ", "fun fun fun", "fun fun" }; + Query query = new PhraseQuery.Builder().add(new Term("text", "fun")).add(new Term("text", "fun")).build(); assertHighlightOneDoc( "text", inputs, diff --git a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java index 42f14bd1c15c0..46c186e656b6b 100644 --- a/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java +++ b/server/src/test/java/org/elasticsearch/search/SearchModuleTests.java @@ -46,10 +46,10 @@ import org.elasticsearch.search.fetch.FetchSubPhase; import org.elasticsearch.search.fetch.subphase.ExplainPhase; import org.elasticsearch.search.fetch.subphase.highlight.CustomHighlighter; +import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; import org.elasticsearch.search.fetch.subphase.highlight.FastVectorHighlighter; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter; -import org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter; import org.elasticsearch.search.internal.ShardSearchRequest; import org.elasticsearch.search.rescore.QueryRescorerBuilder; import org.elasticsearch.search.rescore.RescoreContext; @@ -298,7 +298,7 @@ public Map getHighlighters() { Map highlighters = module.getHighlighters(); assertEquals(FastVectorHighlighter.class, highlighters.get("fvh").getClass()); assertEquals(PlainHighlighter.class, highlighters.get("plain").getClass()); - assertEquals(UnifiedHighlighter.class, highlighters.get("unified").getClass()); + assertEquals(DefaultHighlighter.class, highlighters.get("unified").getClass()); assertSame(highlighters.get("custom"), customHighlighter); } diff --git a/test/framework/src/main/java/org/elasticsearch/search/fetch/HighlighterTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/fetch/HighlighterTestCase.java index f86572f560865..9575a94b8042e 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/fetch/HighlighterTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/fetch/HighlighterTestCase.java @@ -28,7 +28,7 @@ import org.elasticsearch.search.fetch.subphase.highlight.HighlightPhase; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter; -import org.elasticsearch.search.fetch.subphase.highlight.UnifiedHighlighter; +import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; import org.elasticsearch.search.lookup.Source; import java.io.IOException; @@ -47,7 +47,7 @@ public class HighlighterTestCase extends MapperServiceTestCase { protected Map getHighlighters() { return Map.of( "unified", - new UnifiedHighlighter(), + new DefaultHighlighter(), "fvh", new FastVectorHighlighter(getIndexSettings()), "plain", From 86def3dccc2898db7429f0d8f2bc5628627e90ca Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 12 May 2023 16:16:14 +0100 Subject: [PATCH 02/15] fix style --- .../index/mapper/annotatedtext/AnnotatedTextHighlighter.java | 2 +- .../org/elasticsearch/search/fetch/HighlighterTestCase.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java index 768d907833380..45c2a9208b8d6 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighter.java @@ -17,8 +17,8 @@ import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter; import org.elasticsearch.search.fetch.FetchSubPhase.HitContext; -import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; +import org.elasticsearch.search.fetch.subphase.highlight.SearchHighlightContext; import java.io.IOException; import java.util.ArrayList; diff --git a/test/framework/src/main/java/org/elasticsearch/search/fetch/HighlighterTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/fetch/HighlighterTestCase.java index 9575a94b8042e..6911fda2b44c9 100644 --- a/test/framework/src/main/java/org/elasticsearch/search/fetch/HighlighterTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/search/fetch/HighlighterTestCase.java @@ -23,12 +23,12 @@ import org.elasticsearch.index.query.SearchExecutionContext; import org.elasticsearch.search.SearchHit; import org.elasticsearch.search.builder.SearchSourceBuilder; +import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; import org.elasticsearch.search.fetch.subphase.highlight.FastVectorHighlighter; import org.elasticsearch.search.fetch.subphase.highlight.HighlightField; import org.elasticsearch.search.fetch.subphase.highlight.HighlightPhase; import org.elasticsearch.search.fetch.subphase.highlight.Highlighter; import org.elasticsearch.search.fetch.subphase.highlight.PlainHighlighter; -import org.elasticsearch.search.fetch.subphase.highlight.DefaultHighlighter; import org.elasticsearch.search.lookup.Source; import java.io.IOException; From 16013698ce7cd9e6e0d3bd53e7f1eaaa050e692a Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 12 May 2023 16:20:29 +0100 Subject: [PATCH 03/15] address review comment --- .../java/org/elasticsearch/join/query/ChildQuerySearchIT.java | 2 +- .../lucene/search/uhighlight/CustomUnifiedHighlighter.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/parent-join/src/internalClusterTest/java/org/elasticsearch/join/query/ChildQuerySearchIT.java b/modules/parent-join/src/internalClusterTest/java/org/elasticsearch/join/query/ChildQuerySearchIT.java index c1ec58939e800..2821c38a6b248 100644 --- a/modules/parent-join/src/internalClusterTest/java/org/elasticsearch/join/query/ChildQuerySearchIT.java +++ b/modules/parent-join/src/internalClusterTest/java/org/elasticsearch/join/query/ChildQuerySearchIT.java @@ -1743,7 +1743,7 @@ public void testHighlightersIgnoreParentChild() throws IOException { createIndexRequest("test", "child-type", "child-id", "parent-id", "searchText", "quick brown fox").get(); refresh(); - String[] highlightTypes = new String[] { "unified" }; + String[] highlightTypes = new String[] { "plain", "fvh", "unified" }; for (String highlightType : highlightTypes) { logger.info("Testing with highlight type [{}]", highlightType); SearchResponse searchResponse = client().prepareSearch("test") diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java index 600cba528dc84..9c0df03889c38 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -95,7 +95,7 @@ public CustomUnifiedHighlighter( this.noMatchSize = noMatchSize; this.maxAnalyzedOffset = maxAnalyzedOffset; this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset; - if (canExtractWeightMatches(query)) { + if (weightMatchesUnsupported(query)) { getFlags(field).remove(HighlightFlag.WEIGHT_MATCHES); } fieldHighlighter = (CustomFieldHighlighter) getFieldHighlighter(field, query, extractTerms(query), maxPassages); @@ -225,7 +225,7 @@ protected OffsetSource getOffsetSource(String field) { * * @param query The query to highlight */ - private boolean canExtractWeightMatches(Query query) { + private boolean weightMatchesUnsupported(Query query) { boolean[] hasUnknownLeaf = new boolean[1]; query.visit(new QueryVisitor() { From b47ba6943fe5fab3350c3816aac7727a89e0dd9d Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 12 May 2023 16:29:30 +0100 Subject: [PATCH 04/15] fix module compil --- .../lucene/search/uhighlight/CustomUnifiedHighlighter.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java index 9c0df03889c38..e84a3788a6d04 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -142,6 +142,10 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier Date: Fri, 12 May 2023 17:21:10 +0100 Subject: [PATCH 05/15] disable matches mode for runtime fields and when requireFieldMatch is false --- .../AnnotatedTextHighlighterTests.java | 3 +- .../uhighlight/CustomUnifiedHighlighter.java | 34 ++++++++++++++++--- .../highlight/DefaultHighlighter.java | 3 +- .../CustomUnifiedHighlighterTests.java | 3 +- 4 files changed, 35 insertions(+), 8 deletions(-) diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java index 3e2891f1594c7..d56f6c6366d04 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java @@ -145,7 +145,8 @@ private void assertHighlightOneDoc( noMatchSize, expectedPassages.length, maxAnalyzedOffset, - queryMaxAnalyzedOffset + queryMaxAnalyzedOffset, + true ); highlighter.setFieldMatcher((name) -> "text".equals(name)); final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue); diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java index e84a3788a6d04..450cce18896a3 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -25,17 +25,20 @@ import org.apache.lucene.search.uhighlight.PassageFormatter; import org.apache.lucene.search.uhighlight.PassageScorer; import org.apache.lucene.search.uhighlight.UnifiedHighlighter; +import org.apache.lucene.util.automaton.ByteRunAutomaton; import org.elasticsearch.common.CheckedSupplier; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.search.ESToParentBlockJoinQuery; +import org.elasticsearch.search.runtime.AbstractScriptFieldQuery; import java.io.IOException; import java.text.BreakIterator; import java.util.Collection; import java.util.Collections; import java.util.Locale; +import java.util.function.Supplier; import static org.elasticsearch.search.fetch.subphase.highlight.AbstractHighlighterBuilder.MAX_ANALYZED_OFFSET_FIELD; @@ -85,7 +88,8 @@ public CustomUnifiedHighlighter( int noMatchSize, int maxPassages, int maxAnalyzedOffset, - Integer queryMaxAnalyzedOffset + Integer queryMaxAnalyzedOffset, + boolean requireFieldMatch ) throws IOException { super(builder); this.offsetSource = offsetSource; @@ -95,7 +99,7 @@ public CustomUnifiedHighlighter( this.noMatchSize = noMatchSize; this.maxAnalyzedOffset = maxAnalyzedOffset; this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset; - if (weightMatchesUnsupported(query)) { + if (requireFieldMatch == false || weightMatchesUnsupported(query)) { getFlags(field).remove(HighlightFlag.WEIGHT_MATCHES); } fieldHighlighter = (CustomFieldHighlighter) getFieldHighlighter(field, query, extractTerms(query), maxPassages); @@ -230,10 +234,8 @@ protected OffsetSource getOffsetSource(String field) { * @param query The query to highlight */ private boolean weightMatchesUnsupported(Query query) { - boolean[] hasUnknownLeaf = new boolean[1]; query.visit(new QueryVisitor() { - @Override public void visitLeaf(Query leafQuery) { /** @@ -248,10 +250,32 @@ public void visitLeaf(Query leafQuery) { super.visitLeaf(query); } + @Override + public void consumeTerms(Query leafQuery, Term... terms) { + if (leafQuery instanceof AbstractScriptFieldQuery) { + /** + * Queries on runtime fields don't support the matches API. + */ + hasUnknownLeaf[0] = true; + } + super.consumeTerms(query, terms); + } + + @Override + public void consumeTermsMatching(Query leafQuery, String field, Supplier automaton) { + if (leafQuery instanceof AbstractScriptFieldQuery) { + /** + * Queries on runtime fields don't support the matches API. + */ + hasUnknownLeaf[0] = true; + } + super.consumeTermsMatching(query, field, automaton); + } + @Override public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) { /** - * Disable nested query support. + * Nested queries don't support the matches API. */ if (parent instanceof ESToParentBlockJoinQuery) { hasUnknownLeaf[0] = true; diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index d76ba683cf901..18b5cf5e625c6 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -152,7 +152,8 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th fieldContext.field.fieldOptions().noMatchSize(), highlighterNumberOfFragments, maxAnalyzedOffset, - fieldContext.field.fieldOptions().maxAnalyzedOffset() + fieldContext.field.fieldOptions().maxAnalyzedOffset(), + fieldContext.field.fieldOptions().requireFieldMatch() ); } diff --git a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java index b7c9dd14cde26..265a48ac190a0 100644 --- a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java +++ b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -148,7 +148,8 @@ private void assertHighlightOneDoc( noMatchSize, expectedPassages.length, maxAnalyzedOffset, - queryMaxAnalyzedOffset + queryMaxAnalyzedOffset, + true ); final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue); assertEquals(snippets.length, expectedPassages.length); From de0659c84990fe0ed6564e6f579107b232d6d4b2 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 12 May 2023 18:18:37 +0100 Subject: [PATCH 06/15] fix more yml test --- .../test/search.highlight/50_synthetic_source.yml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/50_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/50_synthetic_source.yml index 8f2333605b31f..2b75b48d8a09c 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/50_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/50_synthetic_source.yml @@ -210,7 +210,7 @@ text single unified from reanalysis: fields: foo.analyze: type: unified - - match: { hits.hits.0.highlight.foo\.analyze.0: the quick brown fox jumped over the lazy dog } + - match: { hits.hits.0.highlight.foo\.analyze.0: the quick brown fox jumped over the lazy dog } --- text multi unified from reanalysis: @@ -225,7 +225,7 @@ text multi unified from reanalysis: fields: foo.analyze: type: unified - - match: { hits.hits.0.highlight.foo\.analyze.0: That makes calamity of so long life. } + - match: { hits.hits.0.highlight.foo\.analyze.0: That makes calamity of so long life. } --- text single unified from positions: @@ -240,7 +240,7 @@ text single unified from positions: fields: foo.positions: type: unified - - match: { hits.hits.0.highlight.foo\.positions.0: the quick brown fox jumped over the lazy dog } + - match: { hits.hits.0.highlight.foo\.positions.0: the quick brown fox jumped over the lazy dog } --- text multi unified from positions: @@ -255,7 +255,7 @@ text multi unified from positions: fields: foo.positions: type: unified - - match: { hits.hits.0.highlight.foo\.positions.0: That makes calamity of so long life. } + - match: { hits.hits.0.highlight.foo\.positions.0: That makes calamity of so long life. } --- text single unified from vectors: @@ -270,7 +270,7 @@ text single unified from vectors: fields: foo.vectors: type: unified - - match: { hits.hits.0.highlight.foo\.vectors.0: the quick brown fox jumped over the lazy dog } + - match: { hits.hits.0.highlight.foo\.vectors.0: the quick brown fox jumped over the lazy dog } --- text multi unified from vectors: @@ -285,7 +285,7 @@ text multi unified from vectors: fields: foo.vectors: type: unified - - match: { hits.hits.0.highlight.foo\.vectors.0: That makes calamity of so long life. } + - match: { hits.hits.0.highlight.foo\.vectors.0: That makes calamity of so long life. } --- text single fvh source order: From b7969c4047f237ea0381bfd899808bcaf8f5b339 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 11 Jul 2023 14:36:57 -0400 Subject: [PATCH 07/15] add a setting to disable weight matches mode --- .../AnnotatedTextHighlighterTests.java | 2 ++ .../common/settings/ClusterSettings.java | 2 ++ .../uhighlight/CustomUnifiedHighlighter.java | 20 ++++++++++++++++--- .../highlight/DefaultHighlighter.java | 19 ++++++------------ .../CustomUnifiedHighlighterTests.java | 2 ++ 5 files changed, 29 insertions(+), 16 deletions(-) diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java index d56f6c6366d04..a84883c6fec8b 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java @@ -34,6 +34,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.Strings; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper; @@ -136,6 +137,7 @@ private void assertHighlightOneDoc( builder.withFieldMatcher(name -> "text".equals(name)); builder.withFormatter(passageFormatter); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter( + Settings.EMPTY, builder, UnifiedHighlighter.OffsetSource.ANALYSIS, locale, diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index e21850d60f055..8f4421d0d4d38 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -93,6 +93,7 @@ import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.indices.store.IndicesStore; import org.elasticsearch.ingest.IngestSettings; +import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter; import org.elasticsearch.monitor.fs.FsHealthService; import org.elasticsearch.monitor.fs.FsService; import org.elasticsearch.monitor.jvm.JvmGcMonitorService; @@ -506,6 +507,7 @@ public void apply(Settings value, Settings current, Settings previous) { ThreadPool.LATE_TIME_INTERVAL_WARN_THRESHOLD_SETTING, ThreadPool.SLOW_SCHEDULER_TASK_WARN_THRESHOLD_SETTING, FastVectorHighlighter.SETTING_TV_HIGHLIGHT_MULTI_VALUE, + CustomUnifiedHighlighter.WEIGHT_MATCHES_MODE_ENABLE_SETTING, Node.BREAKER_TYPE_KEY, OperationRouting.USE_ADAPTIVE_REPLICA_SELECTION_SETTING, IndexGraveyard.SETTING_MAX_TOMBSTONES, diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java index 450cce18896a3..8a56317508ab5 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -28,6 +28,8 @@ import org.apache.lucene.util.automaton.ByteRunAutomaton; import org.elasticsearch.common.CheckedSupplier; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.search.ESToParentBlockJoinQuery; @@ -51,6 +53,15 @@ * Supports both returning empty snippets and non highlighted snippets when no highlighting can be performed. */ public class CustomUnifiedHighlighter extends UnifiedHighlighter { + /** + * A cluster setting to enable/disable the {@link HighlightFlag#WEIGHT_MATCHES} mode of the unified highlighter. + */ + public static final Setting WEIGHT_MATCHES_MODE_ENABLE_SETTING = Setting.boolSetting( + "search.highlight.weight_matches_mode.enabled", + true, + Setting.Property.NodeScope + ); + public static final char MULTIVAL_SEP_CHAR = (char) 0; private static final Snippet[] EMPTY_SNIPPET = new Snippet[0]; @@ -66,6 +77,7 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter { /** * Creates a new instance of {@link CustomUnifiedHighlighter} * + * @param settings the {@link Settings} of the node. * @param builder the {@link UnifiedHighlighter.Builder} for the underlying highlighter. * @param offsetSource the {@link OffsetSource} to used for offsets retrieval. * @param breakIteratorLocale the {@link Locale} to use for dividing text into passages. @@ -79,6 +91,7 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter { * offset source for it because it'd be super slow */ public CustomUnifiedHighlighter( + Settings settings, Builder builder, OffsetSource offsetSource, @Nullable Locale breakIteratorLocale, @@ -90,7 +103,7 @@ public CustomUnifiedHighlighter( int maxAnalyzedOffset, Integer queryMaxAnalyzedOffset, boolean requireFieldMatch - ) throws IOException { + ) { super(builder); this.offsetSource = offsetSource; this.breakIteratorLocale = breakIteratorLocale == null ? Locale.ROOT : breakIteratorLocale; @@ -99,7 +112,8 @@ public CustomUnifiedHighlighter( this.noMatchSize = noMatchSize; this.maxAnalyzedOffset = maxAnalyzedOffset; this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset; - if (requireFieldMatch == false || weightMatchesUnsupported(query)) { + if (WEIGHT_MATCHES_MODE_ENABLE_SETTING.get(settings) == false || + requireFieldMatch == false || weightMatchesUnsupported(query)) { getFlags(field).remove(HighlightFlag.WEIGHT_MATCHES); } fieldHighlighter = (CustomFieldHighlighter) getFieldHighlighter(field, query, extractTerms(query), maxPassages); @@ -136,7 +150,7 @@ public Snippet[] highlightField(LeafReader reader, int docId, CheckedSupplier cache = (Map) fieldContext.cache.computeIfAbsent( - DefaultHighlighter.class.getName(), - k -> new HashMap<>() - ); - if (cache.containsKey(fieldContext.fieldName) == false) { - cache.put(fieldContext.fieldName, buildHighlighter(fieldContext)); - } - CustomUnifiedHighlighter highlighter = cache.get(fieldContext.fieldName); + CustomUnifiedHighlighter highlighter = buildHighlighter(fieldContext); MappedFieldType fieldType = fieldContext.fieldType; SearchHighlightContext.Field field = fieldContext.field; FetchSubPhase.HitContext hitContext = fieldContext.hitContext; @@ -105,12 +96,13 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc return new HighlightField(fieldContext.fieldName, Text.convertFromStringArray(fragments)); } - CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) throws IOException { + CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) { + IndexSettings indexSettings = fieldContext.context.getSearchExecutionContext().getIndexSettings(); Encoder encoder = fieldContext.field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT; - int maxAnalyzedOffset = fieldContext.context.getSearchExecutionContext().getIndexSettings().getHighlightMaxAnalyzedOffset(); + int maxAnalyzedOffset = indexSettings.getHighlightMaxAnalyzedOffset(); int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments(); Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset(); Analyzer analyzer = wrapAnalyzer( @@ -143,6 +135,7 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) th builder.withFieldMatcher(fieldMatcher(fieldContext)); builder.withFormatter(passageFormatter); return new CustomUnifiedHighlighter( + indexSettings.getSettings(), builder, offsetSource, fieldContext.field.fieldOptions().boundaryScannerLocale(), diff --git a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java index 265a48ac190a0..c27b7996112fc 100644 --- a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java +++ b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -37,6 +37,7 @@ import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.Strings; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESTestCase; import java.text.BreakIterator; @@ -139,6 +140,7 @@ private void assertHighlightOneDoc( builder.withFieldMatcher(name -> "text".equals(name)); builder.withFormatter(new CustomPassageFormatter("", "", new DefaultEncoder())); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter( + Settings.EMPTY, builder, offsetSource, locale, From e48cabd487fb73f928abffef15b8e4349d48699c Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Wed, 12 Jul 2023 10:12:30 -0400 Subject: [PATCH 08/15] fix style --- .../search/uhighlight/CustomUnifiedHighlighter.java | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java index 8a56317508ab5..881ec433cf1c2 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -57,9 +57,9 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter { * A cluster setting to enable/disable the {@link HighlightFlag#WEIGHT_MATCHES} mode of the unified highlighter. */ public static final Setting WEIGHT_MATCHES_MODE_ENABLE_SETTING = Setting.boolSetting( - "search.highlight.weight_matches_mode.enabled", - true, - Setting.Property.NodeScope + "search.highlight.weight_matches_mode.enabled", + true, + Setting.Property.NodeScope ); public static final char MULTIVAL_SEP_CHAR = (char) 0; @@ -112,8 +112,7 @@ public CustomUnifiedHighlighter( this.noMatchSize = noMatchSize; this.maxAnalyzedOffset = maxAnalyzedOffset; this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset; - if (WEIGHT_MATCHES_MODE_ENABLE_SETTING.get(settings) == false || - requireFieldMatch == false || weightMatchesUnsupported(query)) { + if (WEIGHT_MATCHES_MODE_ENABLE_SETTING.get(settings) == false || requireFieldMatch == false || weightMatchesUnsupported(query)) { getFlags(field).remove(HighlightFlag.WEIGHT_MATCHES); } fieldHighlighter = (CustomFieldHighlighter) getFieldHighlighter(field, query, extractTerms(query), maxPassages); From 966ce17f634d90745da7b4a3dfaab511f302f880 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 8 Aug 2023 13:14:24 +0900 Subject: [PATCH 09/15] move the new setting to a dynamic index setting --- .../AnnotatedTextHighlighterTests.java | 2 +- .../test/search.highlight/10_unified.yml | 44 +++++++++++++++++++ .../common/settings/ClusterSettings.java | 1 - .../common/settings/IndexScopedSettings.java | 1 + .../elasticsearch/index/IndexSettings.java | 23 ++++++++++ .../uhighlight/CustomUnifiedHighlighter.java | 18 ++------ .../highlight/DefaultHighlighter.java | 5 ++- .../CustomUnifiedHighlighterTests.java | 2 +- .../action/TransportResumeFollowAction.java | 1 + 9 files changed, 78 insertions(+), 19 deletions(-) diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java index a84883c6fec8b..af68017c5bfea 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java @@ -137,7 +137,6 @@ private void assertHighlightOneDoc( builder.withFieldMatcher(name -> "text".equals(name)); builder.withFormatter(passageFormatter); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter( - Settings.EMPTY, builder, UnifiedHighlighter.OffsetSource.ANALYSIS, locale, @@ -148,6 +147,7 @@ private void assertHighlightOneDoc( expectedPassages.length, maxAnalyzedOffset, queryMaxAnalyzedOffset, + true, true ); highlighter.setFieldMatcher((name) -> "text".equals(name)); diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/10_unified.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/10_unified.yml index 3916386abc244..1a03896f6d087 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/10_unified.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.highlight/10_unified.yml @@ -23,6 +23,18 @@ setup: - do: indices.refresh: {} +--- +teardown: + - skip: + version: " - 8.9.99" + reason: "setting added in 8.10.0" + + - do: + indices.put_settings: + index: test + body: + index.highlight.weight_matches_mode.enabled: null + --- "Basic multi_match query": - do: @@ -49,3 +61,35 @@ setup: - match: {hits.hits.0.highlight.text.0: "The quick brown fox is brown."} - match: {hits.hits.0.highlight.text\.fvh.0: "The quick brown fox is brown."} - match: {hits.hits.0.highlight.text\.postings.0: "The quick brown fox is brown."} + +--- +"Disable weight matches": + - skip: + version: " - 8.9.99" + reason: "support for weight matches was added in 8.10" + + - do: + search: + body: { + "query": { "multi_match": { "query": "quick brown fox", "type": "phrase", "fields": [ "text*" ] } }, + "highlight": { "type": "unified", "fields": { "*": { } } } } + + - match: { hits.hits.0.highlight.text.0: "The quick brown fox is brown." } + - match: { hits.hits.0.highlight.text\.fvh.0: "The quick brown fox is brown." } + - match: { hits.hits.0.highlight.text\.postings.0: "The quick brown fox is brown." } + + - do: + indices.put_settings: + index: test + body: + index.highlight.weight_matches_mode.enabled: "false" + + - do: + search: + body: { + "query" : { "multi_match" : { "query" : "quick brown fox", "type": "phrase", "fields" : [ "text*"] } }, + "highlight" : { "type" : "unified", "fields" : { "*" : {} } } } + + - match: {hits.hits.0.highlight.text.0: "The quick brown fox is brown."} + - match: {hits.hits.0.highlight.text\.fvh.0: "The quick brown fox is brown."} + - match: {hits.hits.0.highlight.text\.postings.0: "The quick brown fox is brown."} diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index f26493504c18e..bc5960e621ba3 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -508,7 +508,6 @@ public void apply(Settings value, Settings current, Settings previous) { ThreadPool.LATE_TIME_INTERVAL_WARN_THRESHOLD_SETTING, ThreadPool.SLOW_SCHEDULER_TASK_WARN_THRESHOLD_SETTING, FastVectorHighlighter.SETTING_TV_HIGHLIGHT_MULTI_VALUE, - CustomUnifiedHighlighter.WEIGHT_MATCHES_MODE_ENABLE_SETTING, Node.BREAKER_TYPE_KEY, OperationRouting.USE_ADAPTIVE_REPLICA_SELECTION_SETTING, IndexGraveyard.SETTING_MAX_TOMBSTONES, diff --git a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java index 5dabfb5a6314f..01e4824579b90 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/IndexScopedSettings.java @@ -114,6 +114,7 @@ public final class IndexScopedSettings extends AbstractScopedSettings { IndexSettings.MAX_SHINGLE_DIFF_SETTING, IndexSettings.MAX_RESCORE_WINDOW_SETTING, IndexSettings.MAX_ANALYZED_OFFSET_SETTING, + IndexSettings.WEIGHT_MATCHES_MODE_ENABLED_SETTING, IndexSettings.MAX_TERMS_COUNT_SETTING, IndexSettings.INDEX_TRANSLOG_SYNC_INTERVAL_SETTING, IndexSettings.DEFAULT_FIELD_SETTING, diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 6a439f92b0430..53389849cb613 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.util.Strings; import org.apache.lucene.index.MergePolicy; +import org.apache.lucene.search.uhighlight.UnifiedHighlighter; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.IndexRouting; @@ -187,6 +188,17 @@ public final class IndexSettings { Property.IndexScope ); + /** + * Index setting to enable/disable the {@link UnifiedHighlighter.HighlightFlag#WEIGHT_MATCHES} + * mode of the unified highlighter. + */ + public static final Setting WEIGHT_MATCHES_MODE_ENABLED_SETTING = Setting.boolSetting( + "index.highlight.weight_matches_mode.enabled", + true, + Property.Dynamic, + Property.IndexScope + ); + /** * Index setting describing the maximum number of terms that can be used in Terms Query. * The default maximum of 65536 terms is defensive, as extra processing and memory is involved @@ -730,6 +742,7 @@ private void setRetentionLeaseMillis(final TimeValue retentionLease) { private volatile int maxShingleDiff; private volatile TimeValue searchIdleAfter; private volatile int maxAnalyzedOffset; + private volatile boolean weightMatchesEnabled; private volatile int maxTermsCount; private volatile String defaultPipeline; private volatile String requiredPipeline; @@ -871,6 +884,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti maxRefreshListeners = scopedSettings.get(MAX_REFRESH_LISTENERS_PER_SHARD); maxSlicesPerScroll = scopedSettings.get(MAX_SLICES_PER_SCROLL); maxAnalyzedOffset = scopedSettings.get(MAX_ANALYZED_OFFSET_SETTING); + weightMatchesEnabled = scopedSettings.get(WEIGHT_MATCHES_MODE_ENABLED_SETTING); maxTermsCount = scopedSettings.get(MAX_TERMS_COUNT_SETTING); maxRegexLength = scopedSettings.get(MAX_REGEX_LENGTH_SETTING); this.mergePolicyConfig = new MergePolicyConfig(logger, this); @@ -946,6 +960,7 @@ public IndexSettings(final IndexMetadata indexMetadata, final Settings nodeSetti scopedSettings.addSettingsUpdateConsumer(INDEX_REFRESH_INTERVAL_SETTING, this::setRefreshInterval); scopedSettings.addSettingsUpdateConsumer(MAX_REFRESH_LISTENERS_PER_SHARD, this::setMaxRefreshListeners); scopedSettings.addSettingsUpdateConsumer(MAX_ANALYZED_OFFSET_SETTING, this::setHighlightMaxAnalyzedOffset); + scopedSettings.addSettingsUpdateConsumer(WEIGHT_MATCHES_MODE_ENABLED_SETTING, this::setWeightMatchesEnabled); scopedSettings.addSettingsUpdateConsumer(MAX_TERMS_COUNT_SETTING, this::setMaxTermsCount); scopedSettings.addSettingsUpdateConsumer(MAX_SLICES_PER_SCROLL, this::setMaxSlicesPerScroll); scopedSettings.addSettingsUpdateConsumer(DEFAULT_FIELD_SETTING, this::setDefaultFields); @@ -1325,6 +1340,14 @@ private void setHighlightMaxAnalyzedOffset(int maxAnalyzedOffset) { this.maxAnalyzedOffset = maxAnalyzedOffset; } + public boolean isWeightMatchesEnabled() { + return this.weightMatchesEnabled; + } + + private void setWeightMatchesEnabled(boolean value) { + this.weightMatchesEnabled = value; + } + /** * Returns the maximum number of terms that can be used in a Terms Query request */ diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java index 881ec433cf1c2..6266b2b888303 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -28,7 +28,6 @@ import org.apache.lucene.util.automaton.ByteRunAutomaton; import org.elasticsearch.common.CheckedSupplier; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; -import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexSettings; @@ -53,15 +52,6 @@ * Supports both returning empty snippets and non highlighted snippets when no highlighting can be performed. */ public class CustomUnifiedHighlighter extends UnifiedHighlighter { - /** - * A cluster setting to enable/disable the {@link HighlightFlag#WEIGHT_MATCHES} mode of the unified highlighter. - */ - public static final Setting WEIGHT_MATCHES_MODE_ENABLE_SETTING = Setting.boolSetting( - "search.highlight.weight_matches_mode.enabled", - true, - Setting.Property.NodeScope - ); - public static final char MULTIVAL_SEP_CHAR = (char) 0; private static final Snippet[] EMPTY_SNIPPET = new Snippet[0]; @@ -77,7 +67,6 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter { /** * Creates a new instance of {@link CustomUnifiedHighlighter} * - * @param settings the {@link Settings} of the node. * @param builder the {@link UnifiedHighlighter.Builder} for the underlying highlighter. * @param offsetSource the {@link OffsetSource} to used for offsets retrieval. * @param breakIteratorLocale the {@link Locale} to use for dividing text into passages. @@ -89,9 +78,9 @@ public class CustomUnifiedHighlighter extends UnifiedHighlighter { * @param maxPassages the maximum number of passes to highlight * @param maxAnalyzedOffset if the field is more than this long we'll refuse to use the ANALYZED * offset source for it because it'd be super slow + * @param weightMatchesEnabled whether the {@link HighlightFlag#WEIGHT_MATCHES} should be enabled */ public CustomUnifiedHighlighter( - Settings settings, Builder builder, OffsetSource offsetSource, @Nullable Locale breakIteratorLocale, @@ -102,7 +91,8 @@ public CustomUnifiedHighlighter( int maxPassages, int maxAnalyzedOffset, Integer queryMaxAnalyzedOffset, - boolean requireFieldMatch + boolean requireFieldMatch, + boolean weightMatchesEnabled ) { super(builder); this.offsetSource = offsetSource; @@ -112,7 +102,7 @@ public CustomUnifiedHighlighter( this.noMatchSize = noMatchSize; this.maxAnalyzedOffset = maxAnalyzedOffset; this.queryMaxAnalyzedOffset = queryMaxAnalyzedOffset; - if (WEIGHT_MATCHES_MODE_ENABLE_SETTING.get(settings) == false || requireFieldMatch == false || weightMatchesUnsupported(query)) { + if (weightMatchesEnabled == false || requireFieldMatch == false || weightMatchesUnsupported(query)) { getFlags(field).remove(HighlightFlag.WEIGHT_MATCHES); } fieldHighlighter = (CustomFieldHighlighter) getFieldHighlighter(field, query, extractTerms(query), maxPassages); diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index d43f879c25660..c661935774029 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -103,6 +103,7 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) { : HighlightUtils.Encoders.DEFAULT; int maxAnalyzedOffset = indexSettings.getHighlightMaxAnalyzedOffset(); + boolean weightMatchesEnabled = indexSettings.isWeightMatchesEnabled(); int numberOfFragments = fieldContext.field.fieldOptions().numberOfFragments(); Integer queryMaxAnalyzedOffset = fieldContext.field.fieldOptions().maxAnalyzedOffset(); Analyzer analyzer = wrapAnalyzer( @@ -135,7 +136,6 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) { builder.withFieldMatcher(fieldMatcher(fieldContext)); builder.withFormatter(passageFormatter); return new CustomUnifiedHighlighter( - indexSettings.getSettings(), builder, offsetSource, fieldContext.field.fieldOptions().boundaryScannerLocale(), @@ -146,7 +146,8 @@ CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) { highlighterNumberOfFragments, maxAnalyzedOffset, fieldContext.field.fieldOptions().maxAnalyzedOffset(), - fieldContext.field.fieldOptions().requireFieldMatch() + fieldContext.field.fieldOptions().requireFieldMatch(), + weightMatchesEnabled ); } diff --git a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java index c27b7996112fc..841df9aa56289 100644 --- a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java +++ b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -140,7 +140,6 @@ private void assertHighlightOneDoc( builder.withFieldMatcher(name -> "text".equals(name)); builder.withFormatter(new CustomPassageFormatter("", "", new DefaultEncoder())); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter( - Settings.EMPTY, builder, offsetSource, locale, @@ -151,6 +150,7 @@ private void assertHighlightOneDoc( expectedPassages.length, maxAnalyzedOffset, queryMaxAnalyzedOffset, + true, true ); final Snippet[] snippets = highlighter.highlightField(getOnlyLeafReader(reader), topDocs.scoreDocs[0].doc, () -> rawValue); diff --git a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java index fadf0355fb7a8..2a2ac669bcc41 100644 --- a/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java +++ b/x-pack/plugin/ccr/src/main/java/org/elasticsearch/xpack/ccr/action/TransportResumeFollowAction.java @@ -471,6 +471,7 @@ static String[] extractLeaderShardHistoryUUIDs(Map ccrIndexMetad IndexSettings.MAX_REGEX_LENGTH_SETTING, IndexSettings.MAX_TERMS_COUNT_SETTING, IndexSettings.MAX_ANALYZED_OFFSET_SETTING, + IndexSettings.WEIGHT_MATCHES_MODE_ENABLED_SETTING, IndexSettings.MAX_DOCVALUE_FIELDS_SEARCH_SETTING, IndexSettings.MAX_TOKEN_COUNT_SETTING, IndexSettings.MAX_SLICES_PER_SCROLL, From 1780e565755e31d00396b66b8839d4e13421a5cd Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 8 Aug 2023 13:25:44 +0900 Subject: [PATCH 10/15] fix unused import --- .../mapper/annotatedtext/AnnotatedTextHighlighterTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java index af68017c5bfea..5a121ffa53658 100644 --- a/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java +++ b/plugins/mapper-annotated-text/src/test/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextHighlighterTests.java @@ -34,7 +34,6 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedHighlighterAnalyzer; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotatedText; import org.elasticsearch.index.mapper.annotatedtext.AnnotatedTextFieldMapper.AnnotationAnalyzerWrapper; From 7833556a2c639fa6a64be8d45a6838d4c4213729 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 8 Aug 2023 16:01:01 +0900 Subject: [PATCH 11/15] fix unused import --- .../lucene/search/uhighlight/CustomUnifiedHighlighterTests.java | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java index 841df9aa56289..5df6840640264 100644 --- a/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java +++ b/server/src/test/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighterTests.java @@ -37,7 +37,6 @@ import org.apache.lucene.tests.index.RandomIndexWriter; import org.elasticsearch.common.Strings; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.test.ESTestCase; import java.text.BreakIterator; From 9cd6e3ddcec77a8ea0fbf6c3c08efb46e9b0832d Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 8 Aug 2023 18:06:56 +0900 Subject: [PATCH 12/15] address review comments --- .../common/settings/ClusterSettings.java | 1 - .../search/uhighlight/CustomUnifiedHighlighter.java | 4 +++- .../subphase/highlight/DefaultHighlighter.java | 13 ++++++++++--- 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java index bc5960e621ba3..ffd8ba1a4068b 100644 --- a/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java +++ b/server/src/main/java/org/elasticsearch/common/settings/ClusterSettings.java @@ -93,7 +93,6 @@ import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.indices.store.IndicesStore; import org.elasticsearch.ingest.IngestSettings; -import org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter; import org.elasticsearch.monitor.fs.FsHealthService; import org.elasticsearch.monitor.fs.FsService; import org.elasticsearch.monitor.jvm.JvmGcMonitorService; diff --git a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java index 6266b2b888303..3b857d4221523 100644 --- a/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java +++ b/server/src/main/java/org/elasticsearch/lucene/search/uhighlight/CustomUnifiedHighlighter.java @@ -28,7 +28,6 @@ import org.apache.lucene.util.automaton.ByteRunAutomaton; import org.elasticsearch.common.CheckedSupplier; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.Nullable; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.search.ESToParentBlockJoinQuery; @@ -258,6 +257,8 @@ public void consumeTerms(Query leafQuery, Term... terms) { if (leafQuery instanceof AbstractScriptFieldQuery) { /** * Queries on runtime fields don't support the matches API. + * TODO: We should add the support for keyword runtime fields. + * */ hasUnknownLeaf[0] = true; } @@ -269,6 +270,7 @@ public void consumeTermsMatching(Query leafQuery, String field, Supplier cache = (Map) fieldContext.cache.computeIfAbsent( + UnifiedHighlighter.class.getName(), + k -> new HashMap<>() + ); + if (cache.containsKey(fieldContext.fieldName) == false) { + cache.put(fieldContext.fieldName, buildHighlighter(fieldContext)); + } + CustomUnifiedHighlighter highlighter = cache.get(fieldContext.fieldName); IndexSettings indexSettings = fieldContext.context.getSearchExecutionContext().getIndexSettings(); Encoder encoder = fieldContext.field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML From 84b4d595e60cece5638c1daed7d5344195ff9166 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 8 Aug 2023 18:25:56 +0900 Subject: [PATCH 13/15] address review comments (bis) --- .../highlight/DefaultHighlighter.java | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index 7cb6d7ef8d220..6ee6734919f6b 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -36,7 +36,11 @@ import java.io.IOException; import java.text.BreakIterator; -import java.util.*; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; import java.util.function.Predicate; import static org.elasticsearch.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR; @@ -49,7 +53,15 @@ public boolean canHighlight(MappedFieldType fieldType) { @Override public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException { - CustomUnifiedHighlighter highlighter = buildHighlighter(fieldContext); + @SuppressWarnings("unchecked") + Map cache = (Map) fieldContext.cache.computeIfAbsent( + UnifiedHighlighter.class.getName(), + k -> new HashMap<>() + ); + if (cache.containsKey(fieldContext.fieldName) == false) { + cache.put(fieldContext.fieldName, buildHighlighter(fieldContext)); + } + CustomUnifiedHighlighter highlighter = cache.get(fieldContext.fieldName); MappedFieldType fieldType = fieldContext.fieldType; SearchHighlightContext.Field field = fieldContext.field; FetchSubPhase.HitContext hitContext = fieldContext.hitContext; @@ -95,15 +107,6 @@ public HighlightField highlight(FieldHighlightContext fieldContext) throws IOExc } CustomUnifiedHighlighter buildHighlighter(FieldHighlightContext fieldContext) { - @SuppressWarnings("unchecked") - Map cache = (Map) fieldContext.cache.computeIfAbsent( - UnifiedHighlighter.class.getName(), - k -> new HashMap<>() - ); - if (cache.containsKey(fieldContext.fieldName) == false) { - cache.put(fieldContext.fieldName, buildHighlighter(fieldContext)); - } - CustomUnifiedHighlighter highlighter = cache.get(fieldContext.fieldName); IndexSettings indexSettings = fieldContext.context.getSearchExecutionContext().getIndexSettings(); Encoder encoder = fieldContext.field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML From fabf6d67773644562cd8a3d1bc5cb4ee25cbaca3 Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 8 Aug 2023 18:45:52 +0900 Subject: [PATCH 14/15] fix checkstyle --- .../main/java/org/elasticsearch/index/IndexSettings.java | 8 ++++---- .../fetch/subphase/highlight/DefaultHighlighter.java | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/IndexSettings.java b/server/src/main/java/org/elasticsearch/index/IndexSettings.java index 53389849cb613..74d68276c1e3f 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexSettings.java +++ b/server/src/main/java/org/elasticsearch/index/IndexSettings.java @@ -193,10 +193,10 @@ public final class IndexSettings { * mode of the unified highlighter. */ public static final Setting WEIGHT_MATCHES_MODE_ENABLED_SETTING = Setting.boolSetting( - "index.highlight.weight_matches_mode.enabled", - true, - Property.Dynamic, - Property.IndexScope + "index.highlight.weight_matches_mode.enabled", + true, + Property.Dynamic, + Property.IndexScope ); /** diff --git a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java index 6ee6734919f6b..d90aba24a94df 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/DefaultHighlighter.java @@ -55,8 +55,8 @@ public boolean canHighlight(MappedFieldType fieldType) { public HighlightField highlight(FieldHighlightContext fieldContext) throws IOException { @SuppressWarnings("unchecked") Map cache = (Map) fieldContext.cache.computeIfAbsent( - UnifiedHighlighter.class.getName(), - k -> new HashMap<>() + UnifiedHighlighter.class.getName(), + k -> new HashMap<>() ); if (cache.containsKey(fieldContext.fieldName) == false) { cache.put(fieldContext.fieldName, buildHighlighter(fieldContext)); From 174c5944016bf0bd2dceed972f46e12d84ca7cdd Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Tue, 8 Aug 2023 21:42:32 +0900 Subject: [PATCH 15/15] add changelog --- docs/changelog/96068.yaml | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 docs/changelog/96068.yaml diff --git a/docs/changelog/96068.yaml b/docs/changelog/96068.yaml new file mode 100644 index 0000000000000..9faf74414d36e --- /dev/null +++ b/docs/changelog/96068.yaml @@ -0,0 +1,5 @@ +pr: 96068 +summary: Use the Weight#matches mode for highlighting by default +area: Search +type: enhancement +issues: [] \ No newline at end of file