2121
2222import org .apache .lucene .search .highlight .SimpleFragmenter ;
2323import org .apache .lucene .search .highlight .SimpleSpanFragmenter ;
24+ import org .elasticsearch .Version ;
2425import org .elasticsearch .action .support .ToXContentToBytes ;
2526import org .elasticsearch .common .ParseField ;
2627import org .elasticsearch .common .ParsingException ;
3233import org .elasticsearch .common .xcontent .XContentParser ;
3334import org .elasticsearch .index .query .QueryBuilder ;
3435import org .elasticsearch .index .query .QueryParseContext ;
36+ import org .elasticsearch .search .fetch .subphase .highlight .HighlightBuilder .BoundaryScannerType ;
3537import org .elasticsearch .search .fetch .subphase .highlight .HighlightBuilder .Order ;
3638
3739import java .io .IOException ;
3840import java .util .Arrays ;
41+ import java .util .Locale ;
3942import java .util .Map ;
4043import java .util .Objects ;
4144import java .util .function .BiFunction ;
@@ -57,8 +60,10 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
5760 public static final ParseField NUMBER_OF_FRAGMENTS_FIELD = new ParseField ("number_of_fragments" );
5861 public static final ParseField ENCODER_FIELD = new ParseField ("encoder" );
5962 public static final ParseField REQUIRE_FIELD_MATCH_FIELD = new ParseField ("require_field_match" );
63+ public static final ParseField BOUNDARY_SCANNER_FIELD = new ParseField ("boundary_scanner" );
6064 public static final ParseField BOUNDARY_MAX_SCAN_FIELD = new ParseField ("boundary_max_scan" );
6165 public static final ParseField BOUNDARY_CHARS_FIELD = new ParseField ("boundary_chars" );
66+ public static final ParseField BOUNDARY_SCANNER_LOCALE_FIELD = new ParseField ("boundary_scanner_locale" );
6267 public static final ParseField TYPE_FIELD = new ParseField ("type" );
6368 public static final ParseField FRAGMENTER_FIELD = new ParseField ("fragmenter" );
6469 public static final ParseField NO_MATCH_SIZE_FIELD = new ParseField ("no_match_size" );
@@ -88,10 +93,14 @@ public abstract class AbstractHighlighterBuilder<HB extends AbstractHighlighterB
8893
8994 protected Boolean forceSource ;
9095
96+ protected BoundaryScannerType boundaryScannerType ;
97+
9198 protected Integer boundaryMaxScan ;
9299
93100 protected char [] boundaryChars ;
94101
102+ protected Locale boundaryScannerLocale ;
103+
95104 protected Integer noMatchSize ;
96105
97106 protected Integer phraseLimit ;
@@ -119,10 +128,18 @@ protected AbstractHighlighterBuilder(StreamInput in) throws IOException {
119128 order (in .readOptionalWriteable (Order ::readFromStream ));
120129 highlightFilter (in .readOptionalBoolean ());
121130 forceSource (in .readOptionalBoolean ());
131+ if (in .getVersion ().onOrAfter (Version .V_5_4_0_UNRELEASED )) {
132+ boundaryScannerType (in .readOptionalWriteable (BoundaryScannerType ::readFromStream ));
133+ }
122134 boundaryMaxScan (in .readOptionalVInt ());
123135 if (in .readBoolean ()) {
124136 boundaryChars (in .readString ().toCharArray ());
125137 }
138+ if (in .getVersion ().onOrAfter (Version .V_5_4_0_UNRELEASED )) {
139+ if (in .readBoolean ()) {
140+ boundaryScannerLocale (in .readString ());
141+ }
142+ }
126143 noMatchSize (in .readOptionalVInt ());
127144 phraseLimit (in .readOptionalVInt ());
128145 if (in .readBoolean ()) {
@@ -150,12 +167,22 @@ public final void writeTo(StreamOutput out) throws IOException {
150167 out .writeOptionalWriteable (order );
151168 out .writeOptionalBoolean (highlightFilter );
152169 out .writeOptionalBoolean (forceSource );
170+ if (out .getVersion ().onOrAfter (Version .V_5_4_0_UNRELEASED )) {
171+ out .writeOptionalWriteable (boundaryScannerType );
172+ }
153173 out .writeOptionalVInt (boundaryMaxScan );
154174 boolean hasBounaryChars = boundaryChars != null ;
155175 out .writeBoolean (hasBounaryChars );
156176 if (hasBounaryChars ) {
157177 out .writeString (String .valueOf (boundaryChars ));
158178 }
179+ if (out .getVersion ().onOrAfter (Version .V_5_4_0_UNRELEASED )) {
180+ boolean hasBoundaryScannerLocale = boundaryScannerLocale != null ;
181+ out .writeBoolean (hasBoundaryScannerLocale );
182+ if (hasBoundaryScannerLocale ) {
183+ out .writeString (boundaryScannerLocale .toLanguageTag ());
184+ }
185+ }
159186 out .writeOptionalVInt (noMatchSize );
160187 out .writeOptionalVInt (phraseLimit );
161188 boolean hasOptions = options != null ;
@@ -331,6 +358,33 @@ public Boolean highlightFilter() {
331358 return this .highlightFilter ;
332359 }
333360
361+ /**
362+ * When using the highlighterType <tt>fvh</tt> this setting
363+ * controls which scanner to use for fragment boundaries, and defaults to "simple".
364+ */
365+ @ SuppressWarnings ("unchecked" )
366+ public HB boundaryScannerType (String boundaryScannerType ) {
367+ this .boundaryScannerType = BoundaryScannerType .fromString (boundaryScannerType );
368+ return (HB ) this ;
369+ }
370+
371+ /**
372+ * When using the highlighterType <tt>fvh</tt> this setting
373+ * controls which scanner to use for fragment boundaries, and defaults to "simple".
374+ */
375+ @ SuppressWarnings ("unchecked" )
376+ public HB boundaryScannerType (BoundaryScannerType boundaryScannerType ) {
377+ this .boundaryScannerType = boundaryScannerType ;
378+ return (HB ) this ;
379+ }
380+
381+ /**
382+ * @return the value set by {@link #boundaryScannerType(String)}
383+ */
384+ public BoundaryScannerType boundaryScannerType () {
385+ return this .boundaryScannerType ;
386+ }
387+
334388 /**
335389 * When using the highlighterType <tt>fvh</tt> this setting
336390 * controls how far to look for boundary characters, and defaults to 20.
@@ -366,6 +420,25 @@ public char[] boundaryChars() {
366420 return this .boundaryChars ;
367421 }
368422
423+ /**
424+ * When using the highlighterType <tt>fvh</tt> and boundaryScannerType <tt>break_iterator</tt>, this setting
425+ * controls the locale to use by the BreakIterator, defaults to "root".
426+ */
427+ @ SuppressWarnings ("unchecked" )
428+ public HB boundaryScannerLocale (String boundaryScannerLocale ) {
429+ if (boundaryScannerLocale != null ) {
430+ this .boundaryScannerLocale = Locale .forLanguageTag (boundaryScannerLocale );
431+ }
432+ return (HB ) this ;
433+ }
434+
435+ /**
436+ * @return the value set by {@link #boundaryScannerLocale(String)}
437+ */
438+ public Locale boundaryScannerLocale () {
439+ return this .boundaryScannerLocale ;
440+ }
441+
369442 /**
370443 * Allows to set custom options for custom highlighters.
371444 */
@@ -491,12 +564,18 @@ void commonOptionsToXContent(XContentBuilder builder) throws IOException {
491564 if (highlightFilter != null ) {
492565 builder .field (HIGHLIGHT_FILTER_FIELD .getPreferredName (), highlightFilter );
493566 }
567+ if (boundaryScannerType != null ) {
568+ builder .field (BOUNDARY_SCANNER_FIELD .getPreferredName (), boundaryScannerType .name ());
569+ }
494570 if (boundaryMaxScan != null ) {
495571 builder .field (BOUNDARY_MAX_SCAN_FIELD .getPreferredName (), boundaryMaxScan );
496572 }
497573 if (boundaryChars != null ) {
498574 builder .field (BOUNDARY_CHARS_FIELD .getPreferredName (), new String (boundaryChars ));
499575 }
576+ if (boundaryScannerLocale != null ) {
577+ builder .field (BOUNDARY_SCANNER_LOCALE_FIELD .getPreferredName (), boundaryScannerLocale .toLanguageTag ());
578+ }
500579 if (options != null && options .size () > 0 ) {
501580 builder .field (OPTIONS_FIELD .getPreferredName (), options );
502581 }
@@ -523,8 +602,10 @@ static <HB extends AbstractHighlighterBuilder<HB>> BiFunction<QueryParseContext,
523602 parser .declareInt (HB ::fragmentSize , FRAGMENT_SIZE_FIELD );
524603 parser .declareInt (HB ::numOfFragments , NUMBER_OF_FRAGMENTS_FIELD );
525604 parser .declareBoolean (HB ::requireFieldMatch , REQUIRE_FIELD_MATCH_FIELD );
605+ parser .declareString (HB ::boundaryScannerType , BOUNDARY_SCANNER_FIELD );
526606 parser .declareInt (HB ::boundaryMaxScan , BOUNDARY_MAX_SCAN_FIELD );
527607 parser .declareString ((HB hb , String bc ) -> hb .boundaryChars (bc .toCharArray ()) , BOUNDARY_CHARS_FIELD );
608+ parser .declareString (HB ::boundaryScannerLocale , BOUNDARY_SCANNER_LOCALE_FIELD );
528609 parser .declareString (HB ::highlighterType , TYPE_FIELD );
529610 parser .declareString (HB ::fragmenter , FRAGMENTER_FIELD );
530611 parser .declareInt (HB ::noMatchSize , NO_MATCH_SIZE_FIELD );
@@ -562,8 +643,8 @@ static <HB extends AbstractHighlighterBuilder<HB>> BiFunction<QueryParseContext,
562643 public final int hashCode () {
563644 return Objects .hash (getClass (), Arrays .hashCode (preTags ), Arrays .hashCode (postTags ), fragmentSize ,
564645 numOfFragments , highlighterType , fragmenter , highlightQuery , order , highlightFilter ,
565- forceSource , boundaryMaxScan , Arrays .hashCode (boundaryChars ), noMatchSize ,
566- phraseLimit , options , requireFieldMatch , doHashCode ());
646+ forceSource , boundaryScannerType , boundaryMaxScan , Arrays .hashCode (boundaryChars ), boundaryScannerLocale ,
647+ noMatchSize , phraseLimit , options , requireFieldMatch , doHashCode ());
567648 }
568649
569650 /**
@@ -591,8 +672,10 @@ public final boolean equals(Object obj) {
591672 Objects .equals (order , other .order ) &&
592673 Objects .equals (highlightFilter , other .highlightFilter ) &&
593674 Objects .equals (forceSource , other .forceSource ) &&
675+ Objects .equals (boundaryScannerType , other .boundaryScannerType ) &&
594676 Objects .equals (boundaryMaxScan , other .boundaryMaxScan ) &&
595677 Arrays .equals (boundaryChars , other .boundaryChars ) &&
678+ Objects .equals (boundaryScannerLocale , other .boundaryScannerLocale ) &&
596679 Objects .equals (noMatchSize , other .noMatchSize ) &&
597680 Objects .equals (phraseLimit , other .phraseLimit ) &&
598681 Objects .equals (options , other .options ) &&
0 commit comments