101101import org .apache .lucene .analysis .tr .TurkishAnalyzer ;
102102import org .apache .lucene .analysis .util .ElisionFilter ;
103103import org .apache .lucene .util .SetOnce ;
104+ import org .elasticsearch .common .logging .DeprecationCategory ;
105+ import org .elasticsearch .common .logging .DeprecationLogger ;
104106import org .elasticsearch .common .regex .Regex ;
107+ import org .elasticsearch .common .settings .Settings ;
108+ import org .elasticsearch .env .Environment ;
109+ import org .elasticsearch .index .IndexSettings ;
105110import org .elasticsearch .index .IndexVersions ;
106111import org .elasticsearch .index .analysis .AnalyzerProvider ;
107112import org .elasticsearch .index .analysis .CharFilterFactory ;
134139
135140public class CommonAnalysisPlugin extends Plugin implements AnalysisPlugin , ScriptPlugin {
136141
142+ private static final DeprecationLogger deprecationLogger = DeprecationLogger .getLogger (CommonAnalysisPlugin .class );
143+
137144 private final SetOnce <ScriptService > scriptServiceHolder = new SetOnce <>();
138145 private final SetOnce <SynonymsManagementAPIService > synonymsManagementServiceHolder = new SetOnce <>();
139146
@@ -224,6 +231,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
224231 filters .put ("dictionary_decompounder" , requiresAnalysisSettings (DictionaryCompoundWordTokenFilterFactory ::new ));
225232 filters .put ("dutch_stem" , DutchStemTokenFilterFactory ::new );
226233 filters .put ("edge_ngram" , EdgeNGramTokenFilterFactory ::new );
234+ filters .put ("edgeNGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
235+ return new EdgeNGramTokenFilterFactory (indexSettings , environment , name , settings ) {
236+ @ Override
237+ public TokenStream create (TokenStream tokenStream ) {
238+ if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
239+ throw new IllegalArgumentException (
240+ "The [edgeNGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
241+ + "Please change the filter name to [edge_ngram] instead."
242+ );
243+ } else {
244+ deprecationLogger .warn (
245+ DeprecationCategory .ANALYSIS ,
246+ "edgeNGram_deprecation" ,
247+ "The [edgeNGram] token filter name is deprecated and will be removed in a future version. "
248+ + "Please change the filter name to [edge_ngram] instead."
249+ );
250+ }
251+ return super .create (tokenStream );
252+ }
253+
254+ };
255+ });
227256 filters .put ("elision" , requiresAnalysisSettings (ElisionTokenFilterFactory ::new ));
228257 filters .put ("fingerprint" , FingerprintTokenFilterFactory ::new );
229258 filters .put ("flatten_graph" , FlattenGraphTokenFilterFactory ::new );
@@ -243,6 +272,28 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
243272 filters .put ("min_hash" , MinHashTokenFilterFactory ::new );
244273 filters .put ("multiplexer" , MultiplexerTokenFilterFactory ::new );
245274 filters .put ("ngram" , NGramTokenFilterFactory ::new );
275+ filters .put ("nGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
276+ return new NGramTokenFilterFactory (indexSettings , environment , name , settings ) {
277+ @ Override
278+ public TokenStream create (TokenStream tokenStream ) {
279+ if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
280+ throw new IllegalArgumentException (
281+ "The [nGram] token filter name was deprecated in 6.4 and cannot be used in new indices. "
282+ + "Please change the filter name to [ngram] instead."
283+ );
284+ } else {
285+ deprecationLogger .warn (
286+ DeprecationCategory .ANALYSIS ,
287+ "nGram_deprecation" ,
288+ "The [nGram] token filter name is deprecated and will be removed in a future version. "
289+ + "Please change the filter name to [ngram] instead."
290+ );
291+ }
292+ return super .create (tokenStream );
293+ }
294+
295+ };
296+ });
246297 filters .put ("pattern_capture" , requiresAnalysisSettings (PatternCaptureGroupTokenFilterFactory ::new ));
247298 filters .put ("pattern_replace" , requiresAnalysisSettings (PatternReplaceTokenFilterFactory ::new ));
248299 filters .put ("persian_normalization" , PersianNormalizationFilterFactory ::new );
@@ -294,7 +345,39 @@ public Map<String, AnalysisProvider<TokenizerFactory>> getTokenizers() {
294345 tokenizers .put ("simple_pattern" , SimplePatternTokenizerFactory ::new );
295346 tokenizers .put ("simple_pattern_split" , SimplePatternSplitTokenizerFactory ::new );
296347 tokenizers .put ("thai" , ThaiTokenizerFactory ::new );
348+ tokenizers .put ("nGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
349+ if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
350+ throw new IllegalArgumentException (
351+ "The [nGram] tokenizer name was deprecated in 7.6. "
352+ + "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
353+ );
354+ } else if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_7_6_0 )) {
355+ deprecationLogger .warn (
356+ DeprecationCategory .ANALYSIS ,
357+ "nGram_tokenizer_deprecation" ,
358+ "The [nGram] tokenizer name is deprecated and will be removed in a future version. "
359+ + "Please change the tokenizer name to [ngram] instead."
360+ );
361+ }
362+ return new NGramTokenizerFactory (indexSettings , environment , name , settings );
363+ });
297364 tokenizers .put ("ngram" , NGramTokenizerFactory ::new );
365+ tokenizers .put ("edgeNGram" , (IndexSettings indexSettings , Environment environment , String name , Settings settings ) -> {
366+ if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_8_0_0 )) {
367+ throw new IllegalArgumentException (
368+ "The [edgeNGram] tokenizer name was deprecated in 7.6. "
369+ + "Please use the tokenizer name to [edge_nGram] for indices created in versions 8 or higher instead."
370+ );
371+ } else if (indexSettings .getIndexVersionCreated ().onOrAfter (IndexVersions .V_7_6_0 )) {
372+ deprecationLogger .warn (
373+ DeprecationCategory .ANALYSIS ,
374+ "edgeNGram_tokenizer_deprecation" ,
375+ "The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
376+ + "Please change the tokenizer name to [edge_ngram] instead."
377+ );
378+ }
379+ return new EdgeNGramTokenizerFactory (indexSettings , environment , name , settings );
380+ });
298381 tokenizers .put ("edge_ngram" , EdgeNGramTokenizerFactory ::new );
299382 tokenizers .put ("char_group" , CharGroupTokenizerFactory ::new );
300383 tokenizers .put ("classic" , ClassicTokenizerFactory ::new );
@@ -505,17 +588,54 @@ public List<PreConfiguredTokenizer> getPreConfiguredTokenizers() {
505588 tokenizers .add (PreConfiguredTokenizer .singleton ("letter" , LetterTokenizer ::new ));
506589 tokenizers .add (PreConfiguredTokenizer .singleton ("whitespace" , WhitespaceTokenizer ::new ));
507590 tokenizers .add (PreConfiguredTokenizer .singleton ("ngram" , NGramTokenizer ::new ));
508- tokenizers .add (
509- PreConfiguredTokenizer . indexVersion (
510- "edge_ngram" ,
511- ( version ) -> new EdgeNGramTokenizer ( NGramTokenizer . DEFAULT_MIN_NGRAM_SIZE , NGramTokenizer . DEFAULT_MAX_NGRAM_SIZE )
512- )
513- );
591+ tokenizers .add (PreConfiguredTokenizer . indexVersion ( "edge_ngram" , ( version ) -> {
592+ if ( version . onOrAfter ( IndexVersions . V_7_3_0 )) {
593+ return new EdgeNGramTokenizer ( NGramTokenizer . DEFAULT_MIN_NGRAM_SIZE , NGramTokenizer . DEFAULT_MAX_NGRAM_SIZE );
594+ }
595+ return new EdgeNGramTokenizer ( EdgeNGramTokenizer . DEFAULT_MIN_GRAM_SIZE , EdgeNGramTokenizer . DEFAULT_MAX_GRAM_SIZE );
596+ }) );
514597 tokenizers .add (PreConfiguredTokenizer .singleton ("pattern" , () -> new PatternTokenizer (Regex .compile ("\\ W+" , null ), -1 )));
515598 tokenizers .add (PreConfiguredTokenizer .singleton ("thai" , ThaiTokenizer ::new ));
516599 // TODO deprecate and remove in API
517600 // This is already broken with normalization, so backwards compat isn't necessary?
518601 tokenizers .add (PreConfiguredTokenizer .singleton ("lowercase" , XLowerCaseTokenizer ::new ));
602+
603+ // Temporary shim for aliases. TODO deprecate after they are moved
604+ tokenizers .add (PreConfiguredTokenizer .indexVersion ("nGram" , (version ) -> {
605+ if (version .onOrAfter (IndexVersions .V_8_0_0 )) {
606+ throw new IllegalArgumentException (
607+ "The [nGram] tokenizer name was deprecated in 7.6. "
608+ + "Please use the tokenizer name to [ngram] for indices created in versions 8 or higher instead."
609+ );
610+ } else if (version .onOrAfter (IndexVersions .V_7_6_0 )) {
611+ deprecationLogger .warn (
612+ DeprecationCategory .ANALYSIS ,
613+ "nGram_tokenizer_deprecation" ,
614+ "The [nGram] tokenizer name is deprecated and will be removed in a future version. "
615+ + "Please change the tokenizer name to [ngram] instead."
616+ );
617+ }
618+ return new NGramTokenizer ();
619+ }));
620+ tokenizers .add (PreConfiguredTokenizer .indexVersion ("edgeNGram" , (version ) -> {
621+ if (version .onOrAfter (IndexVersions .V_8_0_0 )) {
622+ throw new IllegalArgumentException (
623+ "The [edgeNGram] tokenizer name was deprecated in 7.6. "
624+ + "Please use the tokenizer name to [edge_ngram] for indices created in versions 8 or higher instead."
625+ );
626+ } else if (version .onOrAfter (IndexVersions .V_7_6_0 )) {
627+ deprecationLogger .warn (
628+ DeprecationCategory .ANALYSIS ,
629+ "edgeNGram_tokenizer_deprecation" ,
630+ "The [edgeNGram] tokenizer name is deprecated and will be removed in a future version. "
631+ + "Please change the tokenizer name to [edge_ngram] instead."
632+ );
633+ }
634+ if (version .onOrAfter (IndexVersions .V_7_3_0 )) {
635+ return new EdgeNGramTokenizer (NGramTokenizer .DEFAULT_MIN_NGRAM_SIZE , NGramTokenizer .DEFAULT_MAX_NGRAM_SIZE );
636+ }
637+ return new EdgeNGramTokenizer (EdgeNGramTokenizer .DEFAULT_MIN_GRAM_SIZE , EdgeNGramTokenizer .DEFAULT_MAX_GRAM_SIZE );
638+ }));
519639 tokenizers .add (PreConfiguredTokenizer .singleton ("PathHierarchy" , PathHierarchyTokenizer ::new ));
520640
521641 return tokenizers ;
0 commit comments