@@ -21,6 +21,16 @@ public interface IKuromojiTokenizer : ITokenizer
2121 [ JsonFormatter ( typeof ( NullableStringBooleanFormatter ) ) ]
2222 bool ? DiscardPunctuation { get ; set ; }
2323
24+ /// <summary>
25+ /// Whether original compound tokens should be discarded from the output with
26+ /// <see cref="KuromojiTokenizationMode.Search"/> <see cref="Mode"/>. Defaults to `false`.
27+ /// <para />
28+ /// Valid in Elasticsearch 7.9.0+
29+ /// </summary>
30+ [ DataMember ( Name = "discard_compound_token" ) ]
31+ [ JsonFormatter ( typeof ( NullableStringBooleanFormatter ) ) ]
32+ bool ? DiscardCompoundToken { get ; set ; }
33+
2434 /// <summary>
2535 /// The tokenization mode determines how the tokenizer handles compound and unknown words.
2636 /// </summary>
@@ -64,6 +74,9 @@ public class KuromojiTokenizer : TokenizerBase, IKuromojiTokenizer
6474 /// <inheritdoc />
6575 public bool ? DiscardPunctuation { get ; set ; }
6676
77+ /// <inheritdoc />
78+ public bool ? DiscardCompoundToken { get ; set ; }
79+
6780 /// <inheritdoc />
6881 public KuromojiTokenizationMode ? Mode { get ; set ; }
6982
@@ -86,32 +99,35 @@ public class KuromojiTokenizerDescriptor
8699 {
87100 protected override string Type => "kuromoji_tokenizer" ;
88101 bool ? IKuromojiTokenizer . DiscardPunctuation { get ; set ; }
89-
102+ bool ? IKuromojiTokenizer . DiscardCompoundToken { get ; set ; }
90103 KuromojiTokenizationMode ? IKuromojiTokenizer . Mode { get ; set ; }
91104 int ? IKuromojiTokenizer . NBestCost { get ; set ; }
92105 string IKuromojiTokenizer . NBestExamples { get ; set ; }
93106 string IKuromojiTokenizer . UserDictionary { get ; set ; }
94107 IEnumerable < string > IKuromojiTokenizer . UserDictionaryRules { get ; set ; }
95108
96- /// <inheritdoc />
109+ /// <inheritdoc cref="IKuromojiTokenizer.Mode" />
97110 public KuromojiTokenizerDescriptor Mode ( KuromojiTokenizationMode ? mode ) => Assign ( mode , ( a , v ) => a . Mode = v ) ;
98111
99- /// <inheritdoc />
112+ /// <inheritdoc cref="IKuromojiTokenizer.DiscardPunctuation" />
100113 public KuromojiTokenizerDescriptor DiscardPunctuation ( bool ? discard = true ) => Assign ( discard , ( a , v ) => a . DiscardPunctuation = v ) ;
101114
102- /// <inheritdoc />
115+ /// <inheritdoc cref="IKuromojiTokenizer.DiscardCompoundToken" />
116+ public KuromojiTokenizerDescriptor DiscardCompoundToken ( bool ? discard = true ) => Assign ( discard , ( a , v ) => a . DiscardCompoundToken = v ) ;
117+
118+ /// <inheritdoc cref="IKuromojiTokenizer.UserDictionary" />
103119 public KuromojiTokenizerDescriptor UserDictionary ( string userDictionary ) => Assign ( userDictionary , ( a , v ) => a . UserDictionary = v ) ;
104120
105- /// <inheritdoc />
121+ /// <inheritdoc cref="IKuromojiTokenizer.NBestExamples" />
106122 public KuromojiTokenizerDescriptor NBestExamples ( string examples ) => Assign ( examples , ( a , v ) => a . NBestExamples = v ) ;
107123
108- /// <inheritdoc />
124+ /// <inheritdoc cref="IKuromojiTokenizer.NBestCost" />
109125 public KuromojiTokenizerDescriptor NBestCost ( int ? cost ) => Assign ( cost , ( a , v ) => a . NBestCost = v ) ;
110126
111- /// <inheritdoc />
127+ /// <inheritdoc cref="IKuromojiTokenizer.UserDictionaryRules" />
112128 public KuromojiTokenizerDescriptor UserDictionaryRules ( IEnumerable < string > rules ) => Assign ( rules , ( a , v ) => a . UserDictionaryRules = rules ) ;
113129
114- /// <inheritdoc />
130+ /// <inheritdoc cref="IKuromojiTokenizer.UserDictionaryRules" />
115131 public KuromojiTokenizerDescriptor UserDictionaryRules ( params string [ ] rules ) => Assign ( rules , ( a , v ) => a . UserDictionaryRules = rules ) ;
116132 }
117133}
0 commit comments