Skip to content

Commit 8efe5dc

Browse files
lukas-vlcekkimchy
authored andcommitted
Highlighter enhancements.
1 parent c050bb5 commit 8efe5dc

File tree

4 files changed

+219
-72
lines changed

4 files changed

+219
-72
lines changed
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
package org.apache.lucene.search.vectorhighlight;
2+
3+
import java.util.ArrayList;
4+
import java.util.Iterator;
5+
import java.util.List;
6+
7+
/**
8+
* Copy from lucene trunk:
9+
* http://svn.apache.org/viewvc/lucene/dev/trunk/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/SingleFragListBuilder.java
10+
* This class in not available in 3.0.2 release yet.
11+
*/
12+
public class SingleFragListBuilder implements FragListBuilder {
13+
14+
@Override public FieldFragList createFieldFragList(FieldPhraseList fieldPhraseList, int fragCharSize) {
15+
FieldFragList ffl = new FieldFragList(fragCharSize);
16+
17+
List<FieldPhraseList.WeightedPhraseInfo> wpil = new ArrayList<FieldPhraseList.WeightedPhraseInfo>();
18+
Iterator<FieldPhraseList.WeightedPhraseInfo> ite = fieldPhraseList.phraseList.iterator();
19+
FieldPhraseList.WeightedPhraseInfo phraseInfo = null;
20+
while (true) {
21+
if (!ite.hasNext()) break;
22+
phraseInfo = ite.next();
23+
if (phraseInfo == null) break;
24+
25+
wpil.add(phraseInfo);
26+
}
27+
if (wpil.size() > 0)
28+
ffl.add(0, Integer.MAX_VALUE, wpil);
29+
return ffl;
30+
}
31+
}

modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java

Lines changed: 57 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,12 @@
1919

2020
package org.elasticsearch.search.highlight;
2121

22+
import org.apache.lucene.index.IndexReader;
23+
import org.apache.lucene.search.Query;
2224
import org.apache.lucene.search.vectorhighlight.*;
2325
import org.elasticsearch.ElasticSearchException;
2426
import org.elasticsearch.common.collect.ImmutableMap;
27+
import org.elasticsearch.common.collect.Tuple;
2528
import org.elasticsearch.index.mapper.DocumentMapper;
2629
import org.elasticsearch.index.mapper.FieldMapper;
2730
import org.elasticsearch.search.SearchHit;
@@ -32,9 +35,11 @@
3235
import org.elasticsearch.search.internal.SearchContext;
3336

3437
import java.io.IOException;
35-
import java.util.HashMap;
38+
import java.util.Arrays;
3639
import java.util.Map;
3740

41+
import static org.elasticsearch.common.collect.Maps.newHashMap;
42+
3843
/**
3944
* @author kimchy (shay.banon)
4045
*/
@@ -52,35 +57,30 @@ public class HighlightPhase implements SearchPhase {
5257
return;
5358
}
5459

55-
FragListBuilder fragListBuilder = new SimpleFragListBuilder();
56-
FragmentsBuilder fragmentsBuilder;
57-
if (context.highlight().scoreOrdered()) {
58-
fragmentsBuilder = new ScoreOrderFragmentsBuilder(context.highlight().preTags(), context.highlight().postTags());
59-
} else {
60-
fragmentsBuilder = new SimpleFragmentsBuilder(context.highlight().preTags(), context.highlight().postTags());
61-
}
62-
FastVectorHighlighter highlighter = new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder);
63-
64-
CustomFieldQuery.reader.set(context.searcher().getIndexReader());
65-
CustomFieldQuery.highlightFilters.set(context.highlight().highlightFilter());
60+
Map<Integer,FastVectorHighlighter> highlighterMap = newHashMap();
61+
Map<Integer,FieldQuery> fieldQueryMap = newHashMap();
6662

67-
FieldQuery fieldQuery = new CustomFieldQuery(context.query(), highlighter);
6863
for (SearchHit hit : context.fetchResult().hits().hits()) {
6964
InternalSearchHit internalHit = (InternalSearchHit) hit;
7065

7166
DocumentMapper documentMapper = context.mapperService().type(internalHit.type());
7267
int docId = internalHit.docId();
7368

74-
Map<String, HighlightField> highlightFields = new HashMap<String, HighlightField>();
69+
Map<String, HighlightField> highlightFields = newHashMap();
7570
for (SearchContextHighlight.ParsedHighlightField parsedHighlightField : context.highlight().fields()) {
76-
String indexName = parsedHighlightField.field();
71+
String fieldName = parsedHighlightField.field();
7772
FieldMapper mapper = documentMapper.mappers().smartNameFieldMapper(parsedHighlightField.field());
7873
if (mapper != null) {
79-
indexName = mapper.names().indexName();
74+
fieldName = mapper.names().indexName();
8075
}
76+
77+
Tuple<Integer,FastVectorHighlighter> highlighterTuple = getHighlighter(highlighterMap, parsedHighlightField.settings());
78+
FastVectorHighlighter highlighter = highlighterTuple.v2();
79+
FieldQuery fieldQuery = getFieldQuery(highlighterTuple.v1(), fieldQueryMap, highlighter, context.query(), context.searcher().getIndexReader(), parsedHighlightField.settings());
80+
8181
String[] fragments;
8282
try {
83-
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, indexName, parsedHighlightField.fragmentCharSize(), parsedHighlightField.numberOfFragments());
83+
fragments = highlighter.getBestFragments(fieldQuery, context.searcher().getIndexReader(), docId, fieldName, parsedHighlightField.settings().fragmentCharSize(), parsedHighlightField.settings().numberOfFragments());
8484
} catch (IOException e) {
8585
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + parsedHighlightField.field() + "]", e);
8686
}
@@ -91,4 +91,44 @@ public class HighlightPhase implements SearchPhase {
9191
internalHit.highlightFields(highlightFields);
9292
}
9393
}
94+
95+
private FieldQuery getFieldQuery(int key, Map<Integer,FieldQuery> fieldQueryMap, FastVectorHighlighter highlighter, Query query, IndexReader indexReader, SearchContextHighlight.ParsedHighlightSettings settings) {
96+
FieldQuery fq = fieldQueryMap.get(key);
97+
if (fq == null) {
98+
CustomFieldQuery.reader.set(indexReader);
99+
CustomFieldQuery.highlightFilters.set(settings.highlightFilter());
100+
fq = new CustomFieldQuery(query, highlighter);
101+
fieldQueryMap.put(key,fq);
102+
}
103+
return fq;
104+
}
105+
106+
private Tuple<Integer, FastVectorHighlighter> getHighlighter(Map<Integer,FastVectorHighlighter> highlighterMap, SearchContextHighlight.ParsedHighlightSettings settings) {
107+
108+
FragListBuilder fragListBuilder;
109+
FragmentsBuilder fragmentsBuilder;
110+
if (!settings.fragmentsAllowed()) {
111+
fragListBuilder = new SingleFragListBuilder();
112+
fragmentsBuilder = new SimpleFragmentsBuilder(settings.preTags(), settings.postTags());
113+
} else {
114+
fragListBuilder = new SimpleFragListBuilder();
115+
if (settings.scoreOrdered()) {
116+
fragmentsBuilder = new ScoreOrderFragmentsBuilder(settings.preTags(), settings.postTags());
117+
} else {
118+
fragmentsBuilder = new SimpleFragmentsBuilder(settings.preTags(), settings.postTags());
119+
}
120+
}
121+
122+
// highlighter key is determined by tags and FragList and Fragment builder classes.
123+
String[] mask = Arrays.copyOf(settings.preTags(), settings.preTags().length + settings.postTags().length);
124+
System.arraycopy(settings.postTags(), 0, mask, settings.preTags().length, settings.postTags().length);
125+
int key = (Arrays.toString(mask)+fragListBuilder.getClass().getSimpleName()+fragmentsBuilder.getClass().getSimpleName()).hashCode();
126+
127+
FastVectorHighlighter highlighter = highlighterMap.get(key);
128+
if (highlighter == null) {
129+
highlighter = new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder);
130+
highlighterMap.put(key,highlighter);
131+
}
132+
return Tuple.tuple(key, highlighter);
133+
}
94134
}

modules/elasticsearch/src/main/java/org/elasticsearch/search/highlight/HighlighterParseElement.java

Lines changed: 70 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,15 @@
3333
* <pre>
3434
* highlight : {
3535
* tags_schema : "styled",
36-
* pre_Tags : ["tag1", "tag2"],
36+
* pre_tags : ["tag1", "tag2"],
3737
* post_tags : ["tag1", "tag2"],
3838
* order : "score",
39+
* highlight_filter : true,
3940
* fields : {
40-
* field1 : { }
41-
* field2 : { fragment_size : 100, num_of_fragments : 2 }
41+
* field1 : { },
42+
* field2 : { fragment_size : 100, number_of_fragments : 2 },
43+
* field3 : { number_of_fragments : 5, order : "simple", tags_schema : "styled" },
44+
* field4 : { fragment_type : "content", pre_tags : ["openingTagA", "openingTagB"], post_tags : ["closingTag"] }
4245
* }
4346
* }
4447
* </pre>
@@ -56,17 +59,21 @@ public class HighlighterParseElement implements SearchParseElement {
5659
"<em class=\"hlt7\">", "<em class=\"hlt8\">", "<em class=\"hlt9\">",
5760
"<em class=\"hlt10\">"
5861
};
59-
public static final String[] STYLED_POST_TAGS = {"</em>"};
60-
62+
private static final String[] STYLED_POST_TAGS = {"</em>"};
6163

6264
@Override public void parse(XContentParser parser, SearchContext context) throws Exception {
6365
XContentParser.Token token;
6466
String topLevelFieldName = null;
6567
List<SearchContextHighlight.ParsedHighlightField> fields = newArrayList();
66-
String[] preTags = DEFAULT_PRE_TAGS;
67-
String[] postTags = DEFAULT_POST_TAGS;
68-
boolean scoreOrdered = false;
69-
boolean highlightFilter = true;
68+
69+
String[] globalPreTags = DEFAULT_PRE_TAGS;
70+
String[] globalPostTags = DEFAULT_POST_TAGS;
71+
boolean globalScoreOrdered = false;
72+
boolean globalHighlightFilter = true;
73+
int globalFragmentSize = 100;
74+
int globalNumOfFragments = 5;
75+
boolean globalFragmentsAllowed = true;
76+
7077
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
7178
if (token == XContentParser.Token.FIELD_NAME) {
7279
topLevelFieldName = parser.currentName();
@@ -76,25 +83,31 @@ public class HighlighterParseElement implements SearchParseElement {
7683
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
7784
preTagsList.add(parser.text());
7885
}
79-
preTags = preTagsList.toArray(new String[preTagsList.size()]);
86+
globalPreTags = preTagsList.toArray(new String[preTagsList.size()]);
8087
} else if ("post_tags".equals(topLevelFieldName) || "postTags".equals(topLevelFieldName)) {
8188
List<String> postTagsList = Lists.newArrayList();
8289
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
8390
postTagsList.add(parser.text());
8491
}
85-
postTags = postTagsList.toArray(new String[postTagsList.size()]);
92+
globalPostTags = postTagsList.toArray(new String[postTagsList.size()]);
8693
}
8794
} else if (token.isValue()) {
8895
if ("order".equals(topLevelFieldName)) {
89-
scoreOrdered = "score".equals(parser.text());
96+
globalScoreOrdered = "score".equals(parser.text());
9097
} else if ("tags_schema".equals(topLevelFieldName) || "tagsSchema".equals(topLevelFieldName)) {
9198
String schema = parser.text();
9299
if ("styled".equals(schema)) {
93-
preTags = STYLED_PRE_TAG;
94-
postTags = STYLED_POST_TAGS;
100+
globalPreTags = STYLED_PRE_TAG;
101+
globalPostTags = STYLED_POST_TAGS;
95102
}
96103
} else if ("highlight_filter".equals(topLevelFieldName) || "highlightFilter".equals(topLevelFieldName)) {
97-
highlightFilter = parser.booleanValue();
104+
globalHighlightFilter = parser.booleanValue();
105+
} else if ("fragment_size".equals(topLevelFieldName) || "fragmentSize".equals(topLevelFieldName)) {
106+
globalFragmentSize = parser.intValue();
107+
} else if ("number_of_fragments".equals(topLevelFieldName) || "numberOfFragments".equals(topLevelFieldName)) {
108+
globalNumOfFragments = parser.intValue();
109+
} else if ("fragment_type".equals(topLevelFieldName) || "fragmentType".equals(topLevelFieldName)) {
110+
globalFragmentsAllowed = !("content".equals(parser.text()));
98111
}
99112
} else if (token == XContentParser.Token.START_OBJECT) {
100113
if ("fields".equals(topLevelFieldName)) {
@@ -104,28 +117,64 @@ public class HighlighterParseElement implements SearchParseElement {
104117
highlightFieldName = parser.currentName();
105118
} else if (token == XContentParser.Token.START_OBJECT) {
106119
String fieldName = null;
107-
int fragmentSize = 100;
108-
int numOfFragments = 5;
120+
121+
int fragmentSize = globalFragmentSize;
122+
int numOfFragments = globalNumOfFragments;
123+
boolean highlightFilter = globalHighlightFilter;
124+
boolean scoreOrdered = globalScoreOrdered;
125+
boolean fragmentsAllowed = globalFragmentsAllowed;
126+
String[] preTags = globalPreTags;
127+
String[] postTags = globalPostTags;
128+
109129
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
110130
if (token == XContentParser.Token.FIELD_NAME) {
111131
fieldName = parser.currentName();
132+
} else if (token == XContentParser.Token.START_ARRAY) {
133+
if ("pre_tags".equals(fieldName) || "preTags".equals(fieldName)) {
134+
List<String> preTagsList = Lists.newArrayList();
135+
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
136+
preTagsList.add(parser.text());
137+
}
138+
preTags = preTagsList.toArray(new String[preTagsList.size()]);
139+
} else if ("post_tags".equals(fieldName) || "postTags".equals(fieldName)) {
140+
List<String> postTagsList = Lists.newArrayList();
141+
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
142+
postTagsList.add(parser.text());
143+
}
144+
postTags = postTagsList.toArray(new String[postTagsList.size()]);
145+
}
112146
} else if (token.isValue()) {
113147
if ("fragment_size".equals(fieldName) || "fragmentSize".equals(fieldName)) {
114148
fragmentSize = parser.intValue();
115149
} else if ("number_of_fragments".equals(fieldName) || "numberOfFragments".equals(fieldName)) {
116150
numOfFragments = parser.intValue();
151+
} else if ("highlight_filter".equals(fieldName) || "highlightFilter".equals(fieldName)) {
152+
highlightFilter = parser.booleanValue();
153+
} else if ("score".equals(fieldName)) {
154+
scoreOrdered = "score".equals(parser.text());;
155+
} else if ("fragment_type".equals(fieldName) || "fragmentType".equals(fieldName)) {
156+
fragmentsAllowed = !("content".equals(parser.text()));
117157
}
118158
}
119159
}
120-
fields.add(new SearchContextHighlight.ParsedHighlightField(highlightFieldName, fragmentSize, numOfFragments));
160+
fields.add(new SearchContextHighlight.ParsedHighlightField(
161+
highlightFieldName,
162+
new SearchContextHighlight.ParsedHighlightSettings(
163+
fragmentSize, numOfFragments, preTags, postTags,
164+
scoreOrdered, highlightFilter, fragmentsAllowed)));
121165
}
122166
}
123167
}
124168
}
125169
}
126-
if (preTags != null && postTags == null) {
127-
throw new SearchParseException(context, "Highlighter preTags are set, but postTags are not set");
170+
if (globalPreTags != null && globalPostTags == null) {
171+
throw new SearchParseException(context, "Highlighter global preTags are set, but global postTags are not set");
128172
}
129-
context.highlight(new SearchContextHighlight(fields, preTags, postTags, scoreOrdered, highlightFilter));
173+
context.highlight(new SearchContextHighlight(
174+
fields,
175+
new SearchContextHighlight.ParsedHighlightSettings(
176+
globalFragmentSize, globalNumOfFragments, globalPreTags, globalPostTags,
177+
globalScoreOrdered, globalHighlightFilter, globalFragmentsAllowed))
178+
);
130179
}
131180
}

0 commit comments

Comments
 (0)