@@ -30,6 +30,12 @@ public interface INoriTokenizer : ITokenizer
3030 [ DataMember ( Name = "decompound_mode" ) ]
3131 NoriDecompoundMode ? DecompoundMode { get ; set ; }
3232
33+ /// <summary>
34+ /// Whether punctuation should be discarded from the output. Defaults to `true`.
35+ /// </summary>
36+ [ DataMember ( Name = "discard_punctuation" ) ]
37+ bool ? DiscardPunctuation { get ; set ; }
38+
3339 /// <summary>
3440 /// The Nori tokenizer uses the mecab-ko-dic dictionary by default. A user_dictionary with custom nouns (NNG) may be
3541 /// appended to
@@ -57,6 +63,9 @@ public class NoriTokenizer : TokenizerBase, INoriTokenizer
5763 /// <inheritdoc cref="INoriTokenizer.DecompoundMode" />
5864 public NoriDecompoundMode ? DecompoundMode { get ; set ; }
5965
66+ /// <inheritdoc cref="INoriTokenizer.DiscardPunctuation" />
67+ public bool ? DiscardPunctuation { get ; set ; }
68+
6069 /// <inheritdoc cref="INoriTokenizer.UserDictionary" />
6170 public string UserDictionary { get ; set ; }
6271
@@ -73,6 +82,7 @@ public class NoriTokenizerDescriptor
7382 NoriDecompoundMode ? INoriTokenizer . DecompoundMode { get ; set ; }
7483 string INoriTokenizer . UserDictionary { get ; set ; }
7584 IEnumerable < string > INoriTokenizer . UserDictionaryRules { get ; set ; }
85+ bool ? INoriTokenizer . DiscardPunctuation { get ; set ; }
7686
7787 /// <inheritdoc cref="INoriTokenizer.DecompoundMode" />
7888 public NoriTokenizerDescriptor DecompoundMode ( NoriDecompoundMode ? mode ) => Assign ( mode , ( a , v ) => a . DecompoundMode = v ) ;
@@ -85,5 +95,8 @@ public class NoriTokenizerDescriptor
8595
8696 /// <inheritdoc cref="INoriTokenizer.UserDictionaryRules" />
8797 public NoriTokenizerDescriptor UserDictionaryRules ( IEnumerable < string > rules ) => Assign ( rules , ( a , v ) => a . UserDictionaryRules = v ) ;
98+
99+ /// <inheritdoc cref="INoriTokenizer.DiscardPunctuation" />
100+ public NoriTokenizerDescriptor DiscardPunctuation ( bool ? discard = true ) => Assign ( discard , ( a , v ) => a . DiscardPunctuation = v ) ;
88101 }
89102}
0 commit comments