Skip to content

Commit a906f8a

Browse files
Highlighters skip ignored keyword values (#53408) (#53604)
Keyword field values with length more than ignore_above are not indexed. But highlighters still were retrieving these values from _source and were trying to highlight them. This sometimes lead to errors if a field length exceeded max_analyzed_offset. But also this is an overall wrong behaviour to attempt to highlight something that was ignored during indexing. This PR checks if a keyword value was ignored because of its length, and if yes, skips highlighting it. Backport: #53408 Closes #43800
1 parent 376b2ae commit a906f8a

File tree

5 files changed

+101
-7
lines changed

5 files changed

+101
-7
lines changed

docs/reference/migration/migrate_7_7.asciidoc

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,3 +73,13 @@ The listener thread pool is no longer used internally by Elasticsearch.
7373
Therefore, these settings have been deprecated. You can safely remove these
7474
settings from the configuration of your nodes.
7575

76+
[discrete]
77+
[[breaking_77_highlighters_changes]]
78+
=== Highlighters changes
79+
80+
[discrete]
81+
==== Ignored keyword values are no longer highlighted
82+
If a keyword value was ignored during indexing because of its length
83+
(`ignore_above` parameter was applied), {es} doesn't attempt to
84+
highlight it anymore, which means no highlights are produced for
85+
ignored values.
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
---
2+
setup:
3+
- do:
4+
indices.create:
5+
index: test-index
6+
body:
7+
mappings:
8+
"properties":
9+
"k1":
10+
"type": "keyword"
11+
"k2":
12+
"type": "keyword"
13+
"ignore_above": 3
14+
- do:
15+
bulk:
16+
index: test-index
17+
refresh: true
18+
body:
19+
- '{"index": {"_id": "1"}}'
20+
- '{"k1": "123", "k2" : "123"}'
21+
- '{"index": {"_id": "2"}}'
22+
- '{"k1": "1234", "k2" : "1234"}'
23+
24+
---
25+
"Plain Highligher should skip highlighting ignored keyword values":
26+
- skip:
27+
version: " - 7.6.99"
28+
reason: "skip highlighting of ignored values was introduced in 7.7"
29+
- do:
30+
search:
31+
index: test-index
32+
body:
33+
query:
34+
prefix:
35+
k1: "12"
36+
highlight:
37+
require_field_match: false
38+
fields:
39+
k2:
40+
type: plain
41+
42+
- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
43+
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored
44+
45+
---
46+
"Unified Highligher should skip highlighting ignored keyword values":
47+
- skip:
48+
version: " - 7.6.99"
49+
reason: "skip highlighting of ignored values was introduced in 7.7"
50+
- do:
51+
search:
52+
index: test-index
53+
body:
54+
query:
55+
prefix:
56+
k1: "12"
57+
highlight:
58+
require_field_match: false
59+
fields:
60+
k2:
61+
type: unified
62+
63+
- match: {hits.hits.0.highlight.k2.0: "<em>123</em>"}
64+
- is_false: hits.hits.1.highlight # no highlight for a value that was ignored

server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -314,8 +314,7 @@ protected KeywordFieldMapper(String simpleName, MappedFieldType fieldType, Mappe
314314

315315
/** Values that have more chars than the return value of this method will
316316
* be skipped at parsing time. */
317-
// pkg-private for testing
318-
int ignoreAbove() {
317+
public int ignoreAbove() {
319318
return ignoreAbove;
320319
}
321320

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.elasticsearch.ExceptionsHelper;
3737
import org.elasticsearch.common.text.Text;
3838
import org.elasticsearch.index.IndexSettings;
39+
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3940
import org.elasticsearch.index.mapper.MappedFieldType;
4041
import org.elasticsearch.index.query.QueryShardContext;
4142
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@@ -102,6 +103,12 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
102103
ArrayList<TextFragment> fragsList = new ArrayList<>();
103104
List<Object> textsToHighlight;
104105
Analyzer analyzer = context.getMapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
106+
Integer keywordIgnoreAbove = null;
107+
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
108+
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
109+
.mappers().getMapper(highlighterContext.fieldName);
110+
keywordIgnoreAbove = mapper.ignoreAbove();
111+
};
105112
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
106113

107114
try {
@@ -110,7 +117,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
110117

111118
for (Object textToHighlight : textsToHighlight) {
112119
String text = convertFieldValue(fieldType, textToHighlight);
113-
if (text.length() > maxAnalyzedOffset) {
120+
int textLength = text.length();
121+
if (keywordIgnoreAbove != null && textLength > keywordIgnoreAbove) {
122+
continue; // skip highlighting keyword terms that were ignored during indexing
123+
}
124+
if (textLength > maxAnalyzedOffset) {
114125
throw new IllegalArgumentException(
115126
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
116127
"] doc of [" + context.index().getName() + "] index " +

server/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/UnifiedHighlighter.java

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
import org.elasticsearch.index.IndexSettings;
3737
import org.elasticsearch.index.mapper.DocumentMapper;
3838
import org.elasticsearch.index.mapper.IdFieldMapper;
39+
import org.elasticsearch.index.mapper.KeywordFieldMapper;
3940
import org.elasticsearch.index.mapper.MappedFieldType;
4041
import org.elasticsearch.index.query.QueryShardContext;
4142
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@@ -56,7 +57,7 @@ public class UnifiedHighlighter implements Highlighter {
5657
public boolean canHighlight(MappedFieldType fieldType) {
5758
return true;
5859
}
59-
60+
6061
@Override
6162
public HighlightField highlight(HighlighterContext highlighterContext) {
6263
MappedFieldType fieldType = highlighterContext.fieldType;
@@ -65,11 +66,16 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
6566
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
6667
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
6768
final int maxAnalyzedOffset = context.getIndexSettings().getHighlightMaxAnalyzedOffset();
69+
Integer keywordIgnoreAbove = null;
70+
if (fieldType instanceof KeywordFieldMapper.KeywordFieldType) {
71+
KeywordFieldMapper mapper = (KeywordFieldMapper) context.getMapperService().documentMapper()
72+
.mappers().getMapper(highlighterContext.fieldName);
73+
keywordIgnoreAbove = mapper.ignoreAbove();
74+
}
6875

6976
List<Snippet> snippets = new ArrayList<>();
7077
int numberOfFragments = field.fieldOptions().numberOfFragments();
7178
try {
72-
7379
final Analyzer analyzer = getAnalyzer(context.getMapperService().documentMapper(hitContext.hit().getType()),
7480
hitContext);
7581
List<Object> fieldValues = loadFieldValues(fieldType, field, context, hitContext,
@@ -82,7 +88,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) {
8288
final CustomUnifiedHighlighter highlighter;
8389
final String fieldValue = mergeFieldValues(fieldValues, MULTIVAL_SEP_CHAR);
8490
final OffsetSource offsetSource = getOffsetSource(fieldType);
85-
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValue.length() > maxAnalyzedOffset)) {
91+
int fieldValueLength = fieldValue.length();
92+
if (keywordIgnoreAbove != null && fieldValueLength > keywordIgnoreAbove) {
93+
return null; // skip highlighting keyword terms that were ignored during indexing
94+
}
95+
if ((offsetSource == OffsetSource.ANALYSIS) && (fieldValueLength > maxAnalyzedOffset)) {
8696
throw new IllegalArgumentException(
8797
"The length of [" + highlighterContext.fieldName + "] field of [" + hitContext.hit().getId() +
8898
"] doc of [" + context.index().getName() + "] index " + "has exceeded [" +
@@ -152,7 +162,7 @@ protected PassageFormatter getPassageFormatter(HitContext hitContext, SearchCont
152162
return passageFormatter;
153163
}
154164

155-
165+
156166
protected Analyzer getAnalyzer(DocumentMapper docMapper, HitContext hitContext) {
157167
return docMapper.mappers().indexAnalyzer();
158168
}

0 commit comments

Comments
 (0)