Skip to content

Commit 54175c8

Browse files
authored
add boundary scanner and max fragment length support to unified highl… (#2746)
* add boundary scanner and max fragment lenght support to unified highligther as per #elastic/elasticsearch/23431 * add boundary_max_scan * add support for fragmenter on highlighters * added test for fragmenter and also copied the new properties down to IHighlightField as well
1 parent 288dcaa commit 54175c8

File tree

7 files changed

+303
-222
lines changed

7 files changed

+303
-222
lines changed

src/CodeGeneration/ApiGenerator/ApiGenerator.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<VersionSuffix>alpha</VersionSuffix>
77
</PropertyGroup>
88
<ItemGroup>
9-
<!--<ProjectReference Include="..\..\Nest\Nest.csproj" />-->
9+
<ProjectReference Include="..\..\Nest\Nest.csproj" />
1010
<PackageReference Include="Newtonsoft.Json" Version="10.0.1" />
1111
<PackageReference Include="RazorMachine" Version="2.6.1" />
1212
<!-- TODO the following packages prevent us to jump to netcoreapp1.0 -->

src/Nest/Nest.csproj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,4 @@
1919
<PackageReference Include="System.Linq.Queryable" Version="4.0.1" />
2020
</ItemGroup>
2121
<!--<Import Project="..\..\.paket\Paket.Restore.targets" />-->
22-
</Project>
22+
</Project>
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
using System.Runtime.Serialization;
2+
using Newtonsoft.Json;
3+
using Newtonsoft.Json.Converters;
4+
5+
namespace Nest
6+
{
7+
[JsonConverter(typeof(StringEnumConverter))]
8+
public enum BoundaryScanner
9+
{
10+
/// <summary>
11+
/// (default mode for the FVH): allows to configure which characters (boundary_chars) constitute a boundary for highlighting. It’s a single
12+
/// string with each boundary character defined in it (defaults to .,!? \t\n). It also allows configuring the boundary_max_scan to
13+
/// control how far to look for boundary characters (defaults to 20). Works only with the Fast Vector Highlighter.
14+
/// </summary>
15+
[EnumMember(Value = "chars")]
16+
Characters,
17+
/// <summary>
18+
/// sentence and word: use Java’s BreakIterator to break the highlighted fragments at the next sentence or word boundary.
19+
/// You can further specify boundary_scanner_locale to control which Locale is used to search the text for these boundaries.
20+
/// </summary>
21+
[EnumMember(Value = "sentence")]
22+
Sentence,
23+
/// <summary>
24+
/// sentence and word: use Java’s BreakIterator to break the highlighted fragments at the next sentence or word boundary.
25+
/// You can further specify boundary_scanner_locale to control which Locale is used to search the text for these boundaries.
26+
/// </summary>
27+
[EnumMember(Value = "word")]
28+
Word
29+
}
30+
}

src/Nest/Search/Search/Highlighting/Highlight.cs

Lines changed: 157 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,62 +5,180 @@
55

66
namespace Nest
77
{
8+
//TODO 6.0 completely revisit how we mapped highlighters
9+
//this is used in tophits/percolator AND in search highligher as the root
10+
//Not all of these properties might make sense/valid there
811
[JsonObject(MemberSerialization = MemberSerialization.OptIn)]
912
[JsonConverter(typeof(ReadAsTypeJsonConverter<Highlight>))]
1013
public interface IHighlight
1114
{
15+
/// <summary>
16+
/// Controls the pre tag in which to wrap highights.
17+
/// By default, the highlighting will wrap highlighted text in &lt;em&gt; and &lt;/em&gt;.
18+
/// Using the fast vector highlighter, there can be more tags, and the importance is ordered.
19+
/// </summary>
1220
[JsonProperty("pre_tags")]
1321
IEnumerable<string> PreTags { get; set; }
1422

23+
/// <summary>
24+
/// Controls the post tag in which to wrap highights.
25+
/// By default, the highlighting will wrap highlighted text in &lt;em&gt; and &lt;/em&gt;.
26+
/// Using the fast vector highlighter, there can be more tags, and the importance is ordered.
27+
/// </summary>
1528
[JsonProperty("post_tags")]
1629
IEnumerable<string> PostTags { get; set; }
1730

31+
/// <summary>
32+
/// The size of the highlighted fragment, in characters. Defaults to 100
33+
/// </summary>
1834
[JsonProperty("fragment_size")]
1935
int? FragmentSize { get; set; }
2036

21-
[JsonProperty("tags_schema")]
22-
string TagsSchema { get; set; }
37+
/// <summary>
38+
/// In the case where there is no matching fragment to highlight, the default is to not return anything. Instead, we can return a snippet of text from
39+
/// the beginning of the field by setting no_match_size (default 0) to the length of the text that you want returned. The actual length may be
40+
/// shorter or longer than specified as it tries to break on a word boundary. When using the postings highlighter it is not possible to control the
41+
/// actual size of the snippet, therefore the first sentence gets returned whenever no_match_size is greater than 0.
42+
/// </summary>
43+
[JsonProperty("no_match_size")]
44+
int? NoMatchSize { get; set; }
2345

46+
/// <summary>
47+
/// The maximum number of fragments to return. Defaults to 5.
48+
/// </summary>
2449
[JsonProperty("number_of_fragments")]
2550
int? NumberOfFragments { get; set; }
2651

52+
/// <summary>
53+
/// Controls the margin to start highlighting from when using the fast vector highlighter
54+
/// </summary>
2755
[JsonProperty("fragment_offset")]
2856
int? FragmentOffset { get; set; }
2957

58+
[Obsolete("Bad mapping use BoundaryMaxScan instead")]
3059
[JsonProperty("boundary_max_size")]
3160
int? BoundaryMaxSize { get; set; }
3261

62+
/// <summary>
63+
/// Controls how far to look for boundary characters. Defaults to 20.
64+
/// </summary>
65+
[JsonProperty("boundary_max_scan")]
66+
int? BoundaryMaxScan { get; set; }
67+
68+
/// <summary>
69+
/// Define how highlighted text will be encoded.
70+
/// It can be either default (no encoding) or html (will escape html, if you use html highlighting tags).
71+
/// </summary>
3372
[JsonProperty("encoder")]
3473
string Encoder { get; set; }
3574

75+
/// <summary>
76+
/// The order in which highlighted fragments are sorted
77+
/// </summary>
3678
[JsonProperty("order")]
3779
string Order { get; set; }
3880

81+
/// <summary>
82+
/// Use a specific "tag" schemas.
83+
/// </summary>
84+
/// <remarks>
85+
/// Currently a single schema called "styled" with the following pre_tags:
86+
/// &lt;em class="hlt1"&gt;, &lt;em class="hlt2"&gt;, &lt;em class="hlt3"&gt;,
87+
/// &lt;em class="hlt4"&gt;, &lt;em class="hlt5"&gt;, &lt;em class="hlt6"&gt;,
88+
/// &lt;em class="hlt7"&gt;, &lt;em class="hlt8"&gt;, &lt;em class="hlt9"&gt;,
89+
/// &lt;em class="hlt10"&gt;
90+
/// </remarks>
91+
[JsonProperty("tags_schema")]
92+
string TagsSchema { get; set; }
93+
3994
[JsonProperty(PropertyName = "fields")]
4095
[JsonConverter(typeof(VerbatimDictionaryKeysJsonConverter<Field, IHighlightField>))]
4196
Dictionary<Field, IHighlightField> Fields { get; set; }
4297

98+
/// <summary>
99+
/// Use a specific "tag" schemas.
100+
/// </summary>
101+
/// <remarks>
102+
/// Currently a single schema called "styled" with the following pre_tags:
103+
/// &lt;em class="hlt1"&gt;, &lt;em class="hlt2"&gt;, &lt;em class="hlt3"&gt;,
104+
/// &lt;em class="hlt4"&gt;, &lt;em class="hlt5"&gt;, &lt;em class="hlt6"&gt;,
105+
/// &lt;em class="hlt7"&gt;, &lt;em class="hlt8"&gt;, &lt;em class="hlt9"&gt;,
106+
/// &lt;em class="hlt10"&gt;
107+
/// </remarks>
43108
[JsonProperty("require_field_match")]
44109
bool? RequireFieldMatch { get; set; }
45110

111+
/// <summary>
112+
/// Defines what constitutes a boundary for highlighting when using the fast vector highlighter.
113+
/// It's a single string with each boundary character defined in it. It defaults to .,!? \t\n.
114+
/// </summary>
46115
[JsonProperty("boundary_chars")]
47116
string BoundaryChars { get; set; }
117+
118+
[JsonProperty("max_fragment_length")]
119+
int? MaxFragmentLength { get; set; }
120+
121+
/// <summary>
122+
/// When highlighting a field using the unified highlighter or the fast vector highlighter, you can specify how to break the highlighted
123+
/// fragments using boundary_scanner
124+
/// </summary>
125+
[JsonProperty("boundary_scanner")]
126+
BoundaryScanner? BoundaryScanner { get; set; }
127+
128+
/// <summary>
129+
///You can further specify boundary_scanner_locale to control which Locale is used to search the text for these boundaries.
130+
/// </summary>
131+
[JsonProperty("boundary_scanner_locale")]
132+
string BoundaryScannerLocale { get; set; }
133+
134+
/// <summary>
135+
/// Fragmenter can control how text should be broken up in highlight snippets. However, this option is
136+
/// applicable only for the Plain Highlighter
137+
/// </summary>
138+
[JsonProperty("fragmenter")]
139+
HighlighterFragmenter? Fragmenter { get; set; }
48140
}
49141

50142
public class Highlight : IHighlight
51143
{
144+
// <inheritdoc/>
52145
public IEnumerable<string> PreTags { get; set; }
146+
// <inheritdoc/>
53147
public IEnumerable<string> PostTags { get; set; }
148+
// <inheritdoc/>
54149
public int? FragmentSize { get; set; }
150+
// <inheritdoc/>
55151
public string TagsSchema { get; set; }
152+
// <inheritdoc/>
56153
public int? NumberOfFragments { get; set; }
154+
// <inheritdoc/>
57155
public int? FragmentOffset { get; set; }
156+
// <inheritdoc/>
157+
[Obsolete("Bad mapping use BoundaryMaxScan instead")]
158+
// <inheritdoc/>
58159
public int? BoundaryMaxSize { get; set; }
160+
// <inheritdoc/>
161+
public int? BoundaryMaxScan { get; set; }
162+
// <inheritdoc/>
59163
public string Encoder { get; set; }
164+
// <inheritdoc/>
60165
public string Order { get; set; }
166+
// <inheritdoc/>
61167
public Dictionary<Field, IHighlightField> Fields { get; set; }
168+
// <inheritdoc/>
62169
public bool? RequireFieldMatch { get; set; }
170+
// <inheritdoc/>
63171
public string BoundaryChars { get; set; }
172+
// <inheritdoc/>
173+
public int? MaxFragmentLength { get; set; }
174+
// <inheritdoc/>
175+
public int? NoMatchSize { get; set; }
176+
// <inheritdoc/>
177+
public BoundaryScanner? BoundaryScanner { get; set; }
178+
// <inheritdoc/>
179+
public string BoundaryScannerLocale { get; set; }
180+
// <inheritdoc/>
181+
public HighlighterFragmenter? Fragmenter { get; set; }
64182
}
65183

66184
public class HighlightDescriptor<T> : DescriptorBase<HighlightDescriptor<T> ,IHighlight>, IHighlight
@@ -74,12 +192,19 @@ public class HighlightDescriptor<T> : DescriptorBase<HighlightDescriptor<T> ,IHi
74192
int? IHighlight.NumberOfFragments { get; set; }
75193
int? IHighlight.FragmentOffset { get; set; }
76194
int? IHighlight.BoundaryMaxSize { get; set; }
195+
int? IHighlight.BoundaryMaxScan { get; set; }
77196
string IHighlight.Encoder { get; set; }
78197
string IHighlight.Order { get; set; }
79198
Dictionary<Field, IHighlightField> IHighlight.Fields { get; set; }
80199
bool? IHighlight.RequireFieldMatch { get; set; }
81200
string IHighlight.BoundaryChars { get; set; }
201+
int? IHighlight.MaxFragmentLength { get; set; }
202+
int? IHighlight.NoMatchSize { get; set; }
203+
BoundaryScanner? IHighlight.BoundaryScanner { get; set; }
204+
string IHighlight.BoundaryScannerLocale { get; set; }
205+
HighlighterFragmenter? IHighlight.Fragmenter { get; set; }
82206

207+
// <inheritdoc/>
83208
public HighlightDescriptor<T> Fields(params Func<HighlightFieldDescriptor<T>, IHighlightField>[] fieldHighlighters) =>
84209
Assign(a => a.Fields = fieldHighlighters?
85210
.Select(f =>
@@ -90,30 +215,60 @@ public HighlightDescriptor<T> Fields(params Func<HighlightFieldDescriptor<T>, IH
90215
.NullIfNoKeys()
91216
);
92217

218+
// <inheritdoc/>
93219
public HighlightDescriptor<T> TagsSchema(string schema = "styled") => Assign(a => a.TagsSchema = schema);
94220

221+
// <inheritdoc/>
95222
public HighlightDescriptor<T> PreTags(string preTags) => this.PreTags(new[] {preTags});
96223

224+
// <inheritdoc/>
97225
public HighlightDescriptor<T> PostTags(string postTags)=> this.PostTags(new[] {postTags});
98226

227+
// <inheritdoc/>
99228
public HighlightDescriptor<T> PreTags(IEnumerable<string> preTags) => Assign(a => a.PreTags = preTags.ToListOrNullIfEmpty());
100229

230+
// <inheritdoc/>
101231
public HighlightDescriptor<T> PostTags(IEnumerable<string> postTags) => Assign(a => a.PostTags = postTags.ToListOrNullIfEmpty());
102232

233+
// <inheritdoc/>
103234
public HighlightDescriptor<T> FragmentSize(int fragmentSize) => Assign(a => a.FragmentSize = fragmentSize);
104235

236+
// <inheritdoc/>
105237
public HighlightDescriptor<T> NumberOfFragments(int numberOfFragments) => Assign(a => a.NumberOfFragments = numberOfFragments);
106238

239+
// <inheritdoc/>
107240
public HighlightDescriptor<T> FragmentOffset(int fragmentOffset) => Assign(a => a.FragmentOffset = fragmentOffset);
108241

242+
// <inheritdoc/>
109243
public HighlightDescriptor<T> Encoder(string encoder) => Assign(a => a.Encoder = encoder);
110244

245+
// <inheritdoc/>
111246
public HighlightDescriptor<T> Order(string order) => Assign(a => a.Order = order);
112247

248+
// <inheritdoc/>
113249
public HighlightDescriptor<T> RequireFieldMatch(bool requireFieldMatch) => Assign(a => a.RequireFieldMatch = requireFieldMatch);
114250

251+
// <inheritdoc/>
115252
public HighlightDescriptor<T> BoundaryCharacters(string boundaryCharacters) => Assign(a => a.BoundaryChars = boundaryCharacters);
116253

254+
[Obsolete("Bad mapping use BoundaryMaxScan instead")]
117255
public HighlightDescriptor<T> BoundaryMaxSize(int boundaryMaxSize) => Assign(a => a.BoundaryMaxSize = boundaryMaxSize);
256+
// <inheritdoc/>
257+
public HighlightDescriptor<T> BoundaryMaxScan(int boundaryMaxScan) => Assign(a => a.BoundaryMaxScan = boundaryMaxScan);
258+
259+
// <inheritdoc/>
260+
public HighlightDescriptor<T> MaxFragmentLength(int? maxFragmentLength) => Assign(a => a.MaxFragmentLength = maxFragmentLength);
261+
262+
// <inheritdoc/>
263+
public HighlightDescriptor<T> NoMatchSize(int? noMatchSize) => Assign(a => a.NoMatchSize = noMatchSize);
264+
265+
// <inheritdoc/>
266+
public HighlightDescriptor<T> BoundaryScanner(BoundaryScanner? boundaryScanner) => Assign(a => a.BoundaryScanner = boundaryScanner);
267+
268+
// <inheritdoc/>
269+
public HighlightDescriptor<T> BoundaryScannerLocale(string locale) => Assign(a => a.BoundaryScannerLocale = locale);
270+
271+
// <inheritdoc/>
272+
public HighlightDescriptor<T> Fragmenter(HighlighterFragmenter? fragmenter) => Assign(a => a.Fragmenter = fragmenter);
118273
}
119274
}

0 commit comments

Comments
 (0)