3333import org .apache .lucene .search .spans .SpanOrQuery ;
3434import org .apache .lucene .search .spans .SpanQuery ;
3535import org .apache .lucene .search .spans .SpanTermQuery ;
36+ import org .apache .lucene .util .BytesRef ;
37+ import org .apache .lucene .util .automaton .CharacterRunAutomaton ;
3638import org .elasticsearch .common .Nullable ;
3739import org .elasticsearch .common .lucene .all .AllTermQuery ;
3840import org .elasticsearch .common .lucene .search .MultiPhrasePrefixQuery ;
4749import java .util .List ;
4850import java .util .Locale ;
4951import java .util .Map ;
52+ import java .util .Set ;
5053
5154/**
5255 * Subclass of the {@link UnifiedHighlighter} that works for a single field in a single document.
5760 * Supports both returning empty snippets and non highlighted snippets when no highlighting can be performed.
5861 */
5962public class CustomUnifiedHighlighter extends UnifiedHighlighter {
63+ public static final char MULTIVAL_SEP_CHAR = (char ) 0 ;
6064 private static final Snippet [] EMPTY_SNIPPET = new Snippet [0 ];
6165
6266 private final String fieldValue ;
6367 private final PassageFormatter passageFormatter ;
6468 private final BreakIterator breakIterator ;
65- private final boolean returnNonHighlightedSnippets ;
69+ private final Locale breakIteratorLocale ;
70+ private final int noMatchSize ;
6671
6772 /**
6873 * Creates a new instance of {@link CustomUnifiedHighlighter}
6974 *
7075 * @param analyzer the analyzer used for the field at index time, used for multi term queries internally
7176 * @param passageFormatter our own {@link CustomPassageFormatter}
72- * which generates snippets in forms of {@link Snippet} objects
77+ * which generates snippets in forms of {@link Snippet} objects
78+ * @param breakIteratorLocale the {@link Locale} to use for dividing text into passages.
79+ * If null {@link Locale#ROOT} is used
7380 * @param breakIterator the {@link BreakIterator} to use for dividing text into passages.
74- * If null {@link BreakIterator#getSentenceInstance(Locale)} is used.
75- * @param fieldValue the original field values as constructor argument, loaded from the _source field or
76- * the relevant stored field.
77- * @param returnNonHighlightedSnippets whether non highlighted snippets should be
78- * returned rather than empty snippets when no highlighting can be performed
81+ * If null {@link BreakIterator#getSentenceInstance(Locale)} is used.
82+ * @param fieldValue the original field values delimited by MULTIVAL_SEP_CHAR
83+ * @param noMatchSize The size of the text that should be returned when no highlighting can be performed
7984 */
8085 public CustomUnifiedHighlighter (IndexSearcher searcher ,
8186 Analyzer analyzer ,
8287 PassageFormatter passageFormatter ,
88+ @ Nullable Locale breakIteratorLocale ,
8389 @ Nullable BreakIterator breakIterator ,
8490 String fieldValue ,
85- boolean returnNonHighlightedSnippets ) {
91+ int noMatchSize ) {
8692 super (searcher , analyzer );
8793 this .breakIterator = breakIterator ;
94+ this .breakIteratorLocale = breakIteratorLocale == null ? Locale .ROOT : breakIteratorLocale ;
8895 this .passageFormatter = passageFormatter ;
8996 this .fieldValue = fieldValue ;
90- this .returnNonHighlightedSnippets = returnNonHighlightedSnippets ;
97+ this .noMatchSize = noMatchSize ;
9198 }
9299
93100 /**
@@ -111,16 +118,13 @@ public Snippet[] highlightField(String field, Query query, int docId, int maxPas
111118 @ Override
112119 protected List <CharSequence []> loadFieldValues (String [] fields , DocIdSetIterator docIter ,
113120 int cacheCharsThreshold ) throws IOException {
114- //we only highlight one field, one document at a time
121+ // we only highlight one field, one document at a time
115122 return Collections .singletonList (new String []{fieldValue });
116123 }
117124
118125 @ Override
119126 protected BreakIterator getBreakIterator (String field ) {
120- if (breakIterator != null ) {
121- return breakIterator ;
122- }
123- return super .getBreakIterator (field );
127+ return breakIterator ;
124128 }
125129
126130 @ Override
@@ -129,11 +133,18 @@ protected PassageFormatter getFormatter(String field) {
129133 }
130134
131135 @ Override
132- protected int getMaxNoHighlightPassages (String field ) {
133- if (returnNonHighlightedSnippets ) {
134- return 1 ;
135- }
136- return 0 ;
136+ protected FieldHighlighter getFieldHighlighter (String field , Query query , Set <Term > allTerms , int maxPassages ) {
137+ BytesRef [] terms = filterExtractedTerms (getFieldMatcher (field ), allTerms );
138+ Set <HighlightFlag > highlightFlags = getFlags (field );
139+ PhraseHelper phraseHelper = getPhraseHelper (field , query , highlightFlags );
140+ CharacterRunAutomaton [] automata = getAutomata (field , query , highlightFlags );
141+ OffsetSource offsetSource = getOptimizedOffsetSource (field , terms , phraseHelper , automata );
142+ BreakIterator breakIterator = new SplittingBreakIterator (getBreakIterator (field ),
143+ UnifiedHighlighter .MULTIVAL_SEP_CHAR );
144+ FieldOffsetStrategy strategy =
145+ getOffsetStrategy (offsetSource , field , terms , phraseHelper , automata , highlightFlags );
146+ return new CustomFieldHighlighter (field , strategy , breakIteratorLocale , breakIterator ,
147+ getScorer (field ), maxPassages , (noMatchSize > 0 ? 1 : 0 ), getFormatter (field ), noMatchSize , fieldValue );
137148 }
138149
139150 @ Override
@@ -146,7 +157,6 @@ protected Collection<Query> preSpanQueryRewrite(Query query) {
146157 return rewriteCustomQuery (query );
147158 }
148159
149-
150160 /**
151161 * Translate custom queries in queries that are supported by the unified highlighter.
152162 */
0 commit comments