Skip to content

Commit 3c88dbf

Browse files
committed
Unified highlighter should respect no_match_size with number_of_fragments set to 0 (#41069)
The unified highlighter returns the first sentence of the text when number_of_fragments is set to 0 (full highlighting). This is a legacy of the removed postings highlighter that was based on sentence break only. This commit changes this behavior in order to respect the provided no_match_size value when number_of_fragments is set to 0. This means that the behavior will be consistent for any value of the number_of_fragments option. Closes #41066
1 parent 3bee7e4 commit 3c88dbf

File tree

2 files changed

+5
-40
lines changed

2 files changed

+5
-40
lines changed

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java

Lines changed: 0 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -139,8 +139,6 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
139139
"Failed to highlight field [" + highlighterContext.fieldName + "]", e);
140140
}
141141

142-
snippets = filterSnippets(snippets, field.fieldOptions().numberOfFragments());
143-
144142
if (field.fieldOptions().scoreOrdered()) {
145143
//let's sort the snippets by score if needed
146144
CollectionUtil.introSort(snippets, (o1, o2) -> Double.compare(o2.getScore(), o1.getScore()));
@@ -200,41 +198,6 @@ protected BreakIterator getBreakIterator(SearchContextHighlight.Field field) {
200198
}
201199
}
202200

203-
protected static List<Snippet> filterSnippets(List<Snippet> snippets, int numberOfFragments) {
204-
205-
//We need to filter the snippets as due to no_match_size we could have
206-
//either highlighted snippets or non highlighted ones and we don't want to mix those up
207-
List<Snippet> filteredSnippets = new ArrayList<>(snippets.size());
208-
for (Snippet snippet : snippets) {
209-
if (snippet.isHighlighted()) {
210-
filteredSnippets.add(snippet);
211-
}
212-
}
213-
214-
//if there's at least one highlighted snippet, we return all the highlighted ones
215-
//otherwise we return the first non highlighted one if available
216-
if (filteredSnippets.size() == 0) {
217-
if (snippets.size() > 0) {
218-
Snippet snippet = snippets.get(0);
219-
//if we tried highlighting the whole content using whole break iterator (as number_of_fragments was 0)
220-
//we need to return the first sentence of the content rather than the whole content
221-
if (numberOfFragments == 0) {
222-
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.ROOT);
223-
String text = snippet.getText();
224-
bi.setText(text);
225-
int next = bi.next();
226-
if (next != BreakIterator.DONE) {
227-
String newText = text.substring(0, next).trim();
228-
snippet = new Snippet(newText, snippet.getScore(), snippet.isHighlighted());
229-
}
230-
}
231-
filteredSnippets.add(snippet);
232-
}
233-
}
234-
235-
return filteredSnippets;
236-
}
237-
238201
protected static String convertFieldValue(MappedFieldType type, Object value) {
239202
if (value instanceof BytesRef) {
240203
return type.valueForDisplay(value).toString();

server/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1827,9 +1827,11 @@ public void testHighlightNoMatchSize() throws IOException {
18271827
assertHighlight(response, 0, "text", 0, 1, equalTo("I am pretty long so some"));
18281828

18291829
// We can also ask for a fragment longer than the input string and get the whole string
1830-
field.highlighterType("plain").noMatchSize(text.length() * 2);
1831-
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
1832-
assertHighlight(response, 0, "text", 0, 1, equalTo(text));
1830+
for (String type : new String[] { "plain", "unified" }) {
1831+
field.highlighterType(type).noMatchSize(text.length() * 2).numOfFragments(0);
1832+
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();
1833+
assertHighlight(response, 0, "text", 0, 1, equalTo(text));
1834+
}
18331835

18341836
field.highlighterType("fvh");
18351837
response = client().prepareSearch("test").highlighter(new HighlightBuilder().field(field)).get();

0 commit comments

Comments
 (0)