2020
2121import org .apache .lucene .analysis .LowerCaseFilter ;
2222import org .apache .lucene .analysis .TokenStream ;
23- import org .apache .lucene .analysis .ar .ArabicNormalizationFilter ;
24- import org .apache .lucene .analysis .ar .ArabicStemFilter ;
25- import org .apache .lucene .analysis .br .BrazilianStemFilter ;
26- import org .apache .lucene .analysis .cjk .CJKBigramFilter ;
27- import org .apache .lucene .analysis .cjk .CJKWidthFilter ;
28- import org .apache .lucene .analysis .ckb .SoraniNormalizationFilter ;
29- import org .apache .lucene .analysis .core .DecimalDigitFilter ;
30- import org .apache .lucene .analysis .cz .CzechStemFilter ;
31- import org .apache .lucene .analysis .de .GermanNormalizationFilter ;
32- import org .apache .lucene .analysis .de .GermanStemFilter ;
33- import org .apache .lucene .analysis .fa .PersianNormalizationFilter ;
34- import org .apache .lucene .analysis .fr .FrenchAnalyzer ;
35- import org .apache .lucene .analysis .hi .HindiNormalizationFilter ;
36- import org .apache .lucene .analysis .in .IndicNormalizationFilter ;
37- import org .apache .lucene .analysis .miscellaneous .KeywordRepeatFilter ;
38- import org .apache .lucene .analysis .miscellaneous .LimitTokenCountFilter ;
39- import org .apache .lucene .analysis .miscellaneous .ScandinavianFoldingFilter ;
40- import org .apache .lucene .analysis .miscellaneous .ScandinavianNormalizationFilter ;
41- import org .apache .lucene .analysis .payloads .DelimitedPayloadTokenFilter ;
42- import org .apache .lucene .analysis .payloads .TypeAsPayloadTokenFilter ;
43- import org .apache .lucene .analysis .shingle .ShingleFilter ;
44- import org .apache .lucene .analysis .snowball .SnowballFilter ;
45- import org .apache .lucene .analysis .tr .ApostropheFilter ;
46- import org .apache .lucene .analysis .util .ElisionFilter ;
4723import org .elasticsearch .Version ;
48- import org .elasticsearch .index .analysis .DelimitedPayloadTokenFilterFactory ;
49- import org .elasticsearch .index .analysis .LimitTokenCountFilterFactory ;
5024import org .elasticsearch .index .analysis .MultiTermAwareComponent ;
5125import org .elasticsearch .index .analysis .TokenFilterFactory ;
5226import org .elasticsearch .indices .analysis .PreBuiltCacheFactory .CachingStrategy ;
53- import org .tartarus .snowball .ext .DutchStemmer ;
54- import org .tartarus .snowball .ext .FrenchStemmer ;
5527
5628import java .util .Locale ;
5729
@@ -66,229 +38,7 @@ public TokenStream create(TokenStream tokenStream, Version version) {
6638 protected boolean isMultiTermAware () {
6739 return true ;
6840 }
69- },
70-
71- // Extended Token Filters
72- ELISION (CachingStrategy .ONE ) {
73- @ Override
74- public TokenStream create (TokenStream tokenStream , Version version ) {
75- return new ElisionFilter (tokenStream , FrenchAnalyzer .DEFAULT_ARTICLES );
76- }
77- @ Override
78- protected boolean isMultiTermAware () {
79- return true ;
80- }
81- },
82-
83- ARABIC_STEM (CachingStrategy .ONE ) {
84- @ Override
85- public TokenStream create (TokenStream tokenStream , Version version ) {
86- return new ArabicStemFilter (tokenStream );
87- }
88- },
89-
90- BRAZILIAN_STEM (CachingStrategy .ONE ) {
91- @ Override
92- public TokenStream create (TokenStream tokenStream , Version version ) {
93- return new BrazilianStemFilter (tokenStream );
94- }
95- },
96-
97- CZECH_STEM (CachingStrategy .ONE ) {
98- @ Override
99- public TokenStream create (TokenStream tokenStream , Version version ) {
100- return new CzechStemFilter (tokenStream );
101- }
102- },
103-
104- DUTCH_STEM (CachingStrategy .ONE ) {
105- @ Override
106- public TokenStream create (TokenStream tokenStream , Version version ) {
107- return new SnowballFilter (tokenStream , new DutchStemmer ());
108- }
109- },
110-
111- FRENCH_STEM (CachingStrategy .ONE ) {
112- @ Override
113- public TokenStream create (TokenStream tokenStream , Version version ) {
114- return new SnowballFilter (tokenStream , new FrenchStemmer ());
115- }
116- },
117-
118- GERMAN_STEM (CachingStrategy .ONE ) {
119- @ Override
120- public TokenStream create (TokenStream tokenStream , Version version ) {
121- return new GermanStemFilter (tokenStream );
122- }
123- },
124-
125- RUSSIAN_STEM (CachingStrategy .ONE ) {
126- @ Override
127- public TokenStream create (TokenStream tokenStream , Version version ) {
128- return new SnowballFilter (tokenStream , "Russian" );
129- }
130- },
131-
132- KEYWORD_REPEAT (CachingStrategy .ONE ) {
133- @ Override
134- public TokenStream create (TokenStream tokenStream , Version version ) {
135- return new KeywordRepeatFilter (tokenStream );
136- }
137- },
138-
139- ARABIC_NORMALIZATION (CachingStrategy .ONE ) {
140- @ Override
141- public TokenStream create (TokenStream tokenStream , Version version ) {
142- return new ArabicNormalizationFilter (tokenStream );
143- }
144- @ Override
145- protected boolean isMultiTermAware () {
146- return true ;
147- }
148- },
149-
150- PERSIAN_NORMALIZATION (CachingStrategy .ONE ) {
151- @ Override
152- public TokenStream create (TokenStream tokenStream , Version version ) {
153- return new PersianNormalizationFilter (tokenStream );
154- }
155- @ Override
156- protected boolean isMultiTermAware () {
157- return true ;
158- }
159- },
160-
161- TYPE_AS_PAYLOAD (CachingStrategy .ONE ) {
162- @ Override
163- public TokenStream create (TokenStream tokenStream , Version version ) {
164- return new TypeAsPayloadTokenFilter (tokenStream );
165- }
166- },
167-
168- SHINGLE (CachingStrategy .ONE ) {
169- @ Override
170- public TokenStream create (TokenStream tokenStream , Version version ) {
171- return new ShingleFilter (tokenStream );
172- }
173- },
174-
175- GERMAN_NORMALIZATION (CachingStrategy .ONE ) {
176- @ Override
177- public TokenStream create (TokenStream tokenStream , Version version ) {
178- return new GermanNormalizationFilter (tokenStream );
179- }
180- @ Override
181- protected boolean isMultiTermAware () {
182- return true ;
183- }
184- },
185-
186- HINDI_NORMALIZATION (CachingStrategy .ONE ) {
187- @ Override
188- public TokenStream create (TokenStream tokenStream , Version version ) {
189- return new HindiNormalizationFilter (tokenStream );
190- }
191- @ Override
192- protected boolean isMultiTermAware () {
193- return true ;
194- }
195- },
196-
197- INDIC_NORMALIZATION (CachingStrategy .ONE ) {
198- @ Override
199- public TokenStream create (TokenStream tokenStream , Version version ) {
200- return new IndicNormalizationFilter (tokenStream );
201- }
202- @ Override
203- protected boolean isMultiTermAware () {
204- return true ;
205- }
206- },
207-
208- SORANI_NORMALIZATION (CachingStrategy .ONE ) {
209- @ Override
210- public TokenStream create (TokenStream tokenStream , Version version ) {
211- return new SoraniNormalizationFilter (tokenStream );
212- }
213- @ Override
214- protected boolean isMultiTermAware () {
215- return true ;
216- }
217- },
218-
219- SCANDINAVIAN_NORMALIZATION (CachingStrategy .ONE ) {
220- @ Override
221- public TokenStream create (TokenStream tokenStream , Version version ) {
222- return new ScandinavianNormalizationFilter (tokenStream );
223- }
224- @ Override
225- protected boolean isMultiTermAware () {
226- return true ;
227- }
228- },
229-
230- SCANDINAVIAN_FOLDING (CachingStrategy .ONE ) {
231- @ Override
232- public TokenStream create (TokenStream tokenStream , Version version ) {
233- return new ScandinavianFoldingFilter (tokenStream );
234- }
235- @ Override
236- protected boolean isMultiTermAware () {
237- return true ;
238- }
239- },
240-
241- APOSTROPHE (CachingStrategy .ONE ) {
242- @ Override
243- public TokenStream create (TokenStream tokenStream , Version version ) {
244- return new ApostropheFilter (tokenStream );
245- }
246- },
247-
248- CJK_WIDTH (CachingStrategy .ONE ) {
249- @ Override
250- public TokenStream create (TokenStream tokenStream , Version version ) {
251- return new CJKWidthFilter (tokenStream );
252- }
253- @ Override
254- protected boolean isMultiTermAware () {
255- return true ;
256- }
257- },
258-
259- DECIMAL_DIGIT (CachingStrategy .ONE ) {
260- @ Override
261- public TokenStream create (TokenStream tokenStream , Version version ) {
262- return new DecimalDigitFilter (tokenStream );
263- }
264- @ Override
265- protected boolean isMultiTermAware () {
266- return true ;
267- }
268- },
269-
270- CJK_BIGRAM (CachingStrategy .ONE ) {
271- @ Override
272- public TokenStream create (TokenStream tokenStream , Version version ) {
273- return new CJKBigramFilter (tokenStream );
274- }
275- },
276-
277- DELIMITED_PAYLOAD_FILTER (CachingStrategy .ONE ) {
278- @ Override
279- public TokenStream create (TokenStream tokenStream , Version version ) {
280- return new DelimitedPayloadTokenFilter (tokenStream , DelimitedPayloadTokenFilterFactory .DEFAULT_DELIMITER , DelimitedPayloadTokenFilterFactory .DEFAULT_ENCODER );
281- }
282- },
283-
284- LIMIT (CachingStrategy .ONE ) {
285- @ Override
286- public TokenStream create (TokenStream tokenStream , Version version ) {
287- return new LimitTokenCountFilter (tokenStream , LimitTokenCountFilterFactory .DEFAULT_MAX_TOKEN_COUNT , LimitTokenCountFilterFactory .DEFAULT_CONSUME_ALL_TOKENS );
288- }
289- },
290-
291- ;
41+ };
29242
29343 protected boolean isMultiTermAware () {
29444 return false ;
0 commit comments