From 89ce6c209f0e67941c9eeeaea0e61b9178d2b2db Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 18 Nov 2016 09:42:42 +0100 Subject: [PATCH 1/2] Fix highlighting on a stored keyword field The highlighter converts stored keyword fields using toString(). Since the keyword fields are stored as utf8 bytes the conversion is broken. This change uses BytesRef.utf8toString() to convert the field value in a valid string. Fixes #21636 --- .../subphase/highlight/PlainHighlighter.java | 10 ++++++-- .../highlight/HighlighterSearchIT.java | 25 +++++++++++++++++-- 2 files changed, 31 insertions(+), 4 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java index e821b0fd9a82b..4ea0b15720037 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java @@ -31,6 +31,7 @@ import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.search.highlight.TextFragment; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefHash; import org.apache.lucene.util.CollectionUtil; import org.elasticsearch.ExceptionsHelper; @@ -104,9 +105,14 @@ public HighlightField highlight(HighlighterContext highlighterContext) { try { textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext); - for (Object textToHighlight : textsToHighlight) { - String text = textToHighlight.toString(); + String text; + if (textToHighlight instanceof BytesRef) { + // keywords are internally stored as utf8 bytes + text = ((BytesRef) textToHighlight).utf8ToString(); + } else { + text = textToHighlight.toString(); + } try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().name(), text)) { if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) { diff --git a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index ed63ea1ea1c46..8f8887bd150b4 100644 --- a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -24,7 +24,6 @@ import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.action.search.SearchResponse; -import org.elasticsearch.action.search.SearchType; import org.elasticsearch.action.support.WriteRequest; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.geo.GeoPoint; @@ -41,7 +40,6 @@ import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; import org.elasticsearch.index.search.MatchQuery; -import org.elasticsearch.indices.IndicesRequestCache; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.search.SearchHit; @@ -106,6 +104,29 @@ protected Collection> nodePlugins() { return Collections.singletonList(InternalSettingsPlugin.class); } + public void testHighlightingWithStoredKeyword() throws IOException { + XContentBuilder mappings = jsonBuilder(); + mappings.startObject(); + mappings.startObject("type") + .startObject("properties") + .startObject("text") + .field("type", "keyword") + .field("store", true) + .endObject() + .endObject() + .endObject(); + mappings.endObject(); + assertAcked(prepareCreate("test") + .addMapping("type", mappings)); + client().prepareIndex("test", "type", "1") + .setSource(jsonBuilder().startObject().field("text", "foo").endObject()) + .get(); + refresh(); + SearchResponse search = client().prepareSearch().setQuery(matchQuery("text", "foo")) + .highlighter(new HighlightBuilder().field(new Field("text"))).get(); + assertHighlight(search, 0, "text", 0, equalTo("foo")); + } + public void testHighlightingWithWildcardName() throws IOException { // test the kibana case with * as fieldname that will try highlight all fields including meta fields XContentBuilder mappings = jsonBuilder(); From b58c131e07484c3361f1ec85f0e89b2a6326173a Mon Sep 17 00:00:00 2001 From: Jim Ferenczi Date: Fri, 18 Nov 2016 12:06:56 +0100 Subject: [PATCH 2/2] Replace BytesRef#utf8ToString with MappedFieldType#valueForDisplay --- .../search/fetch/subphase/highlight/PlainHighlighter.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java index 4ea0b15720037..127a008f9cc1d 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/PlainHighlighter.java @@ -105,11 +105,11 @@ public HighlightField highlight(HighlighterContext highlighterContext) { try { textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext); + for (Object textToHighlight : textsToHighlight) { String text; if (textToHighlight instanceof BytesRef) { - // keywords are internally stored as utf8 bytes - text = ((BytesRef) textToHighlight).utf8ToString(); + text = mapper.fieldType().valueForDisplay(textToHighlight).toString(); } else { text = textToHighlight.toString(); }