Skip to content

Commit 6db708e

Browse files
committed
Move more token filters to analysis-common module
The following token filters were moved: common grams, limit token, pattern capture and pattern raplace. Relates to #23658
1 parent d71fece commit 6db708e

File tree

19 files changed

+180
-66
lines changed

19 files changed

+180
-66
lines changed

buildSrc/src/main/resources/checkstyle_suppressions.xml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,6 @@
266266
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicyConfig.java" checks="LineLength" />
267267
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLog.java" checks="LineLength" />
268268
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]AnalysisRegistry.java" checks="LineLength" />
269-
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]CommonGramsTokenFilterFactory.java" checks="LineLength" />
270269
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]CustomAnalyzerProvider.java" checks="LineLength" />
271270
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]ShingleTokenFilterFactory.java" checks="LineLength" />
272271
<suppress files="core[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]StemmerOverrideTokenFilterFactory.java" checks="LineLength" />
@@ -564,9 +563,7 @@
564563
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]IndexingSlowLogTests.java" checks="LineLength" />
565564
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]MergePolicySettingsTests.java" checks="LineLength" />
566565
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]SearchSlowLogTests.java" checks="LineLength" />
567-
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PatternCaptureTokenFilterTests.java" checks="LineLength" />
568566
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PreBuiltAnalyzerTests.java" checks="LineLength" />
569-
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]commongrams[/\\]CommonGramsTokenFilterFactoryTests.java" checks="LineLength" />
570567
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]engine[/\\]InternalEngineMergeIT.java" checks="LineLength" />
571568
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]engine[/\\]InternalEngineTests.java" checks="LineLength" />
572569
<suppress files="core[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]fielddata[/\\]AbstractFieldDataTestCase.java" checks="LineLength" />

core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,6 @@
4646
import org.elasticsearch.index.analysis.CjkAnalyzerProvider;
4747
import org.elasticsearch.index.analysis.ClassicFilterFactory;
4848
import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
49-
import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
5049
import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
5150
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
5251
import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
@@ -80,16 +79,13 @@
8079
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
8180
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
8281
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
83-
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
8482
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
8583
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
8684
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
8785
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
8886
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
8987
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
9088
import org.elasticsearch.index.analysis.PatternAnalyzerProvider;
91-
import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
92-
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
9389
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
9490
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
9591
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
@@ -196,13 +192,9 @@ private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(Li
196192
tokenFilters.register("standard", StandardTokenFilterFactory::new);
197193
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
198194
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
199-
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
200-
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
201195
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
202196
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
203197
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
204-
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
205-
tokenFilters.register("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new));
206198
tokenFilters.register("arabic_stem", ArabicStemTokenFilterFactory::new);
207199
tokenFilters.register("brazilian_stem", BrazilianStemTokenFilterFactory::new);
208200
tokenFilters.register("czech_stem", CzechStemTokenFilterFactory::new);

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,6 @@
6868
import org.elasticsearch.index.analysis.CharFilterFactory;
6969
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
7070
import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory;
71-
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
7271
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
7372
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
7473
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
@@ -115,6 +114,10 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
115114
filters.put("reverse", ReverseTokenFilterFactory::new);
116115
filters.put("elision", ElisionTokenFilterFactory::new);
117116
filters.put("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
117+
filters.put("limit", LimitTokenCountFilterFactory::new);
118+
filters.put("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
119+
filters.put("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new));
120+
filters.put("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
118121
return filters;
119122
}
120123

core/src/main/java/org/elasticsearch/index/analysis/CommonGramsTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonGramsTokenFilterFactory.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.CharArraySet;
2323
import org.apache.lucene.analysis.TokenStream;
@@ -26,6 +26,8 @@
2626
import org.elasticsearch.common.settings.Settings;
2727
import org.elasticsearch.env.Environment;
2828
import org.elasticsearch.index.IndexSettings;
29+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
30+
import org.elasticsearch.index.analysis.Analysis;
2931

3032
public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
3133

@@ -35,14 +37,17 @@ public class CommonGramsTokenFilterFactory extends AbstractTokenFilterFactory {
3537

3638
private final boolean queryMode;
3739

38-
public CommonGramsTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
40+
CommonGramsTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
3941
super(indexSettings, name, settings);
40-
this.ignoreCase = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "ignore_case", false, deprecationLogger);
41-
this.queryMode = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(), "query_mode", false, deprecationLogger);
42+
this.ignoreCase = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(),
43+
"ignore_case", false, deprecationLogger);
44+
this.queryMode = settings.getAsBooleanLenientForPreEs6Indices(indexSettings.getIndexVersionCreated(),
45+
"query_mode", false, deprecationLogger);
4246
this.words = Analysis.parseCommonWords(env, settings, null, ignoreCase);
4347

4448
if (this.words == null) {
45-
throw new IllegalArgumentException("missing or empty [common_words] or [common_words_path] configuration for common_grams token filter");
49+
throw new IllegalArgumentException(
50+
"missing or empty [common_words] or [common_words_path] configuration for common_grams token filter");
4651
}
4752
}
4853

core/src/main/java/org/elasticsearch/index/analysis/LimitTokenCountFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LimitTokenCountFilterFactory.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,24 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.TokenStream;
2323
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
2424
import org.elasticsearch.common.settings.Settings;
2525
import org.elasticsearch.env.Environment;
2626
import org.elasticsearch.index.IndexSettings;
27+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
2728

2829
public class LimitTokenCountFilterFactory extends AbstractTokenFilterFactory {
2930

30-
public static final int DEFAULT_MAX_TOKEN_COUNT = 1;
31-
public static final boolean DEFAULT_CONSUME_ALL_TOKENS = false;
31+
static final int DEFAULT_MAX_TOKEN_COUNT = 1;
32+
static final boolean DEFAULT_CONSUME_ALL_TOKENS = false;
3233

33-
final int maxTokenCount;
34-
final boolean consumeAllTokens;
34+
private final int maxTokenCount;
35+
private final boolean consumeAllTokens;
3536

36-
public LimitTokenCountFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
37+
LimitTokenCountFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
3738
super(indexSettings, name, settings);
3839
this.maxTokenCount = settings.getAsInt("max_token_count", DEFAULT_MAX_TOKEN_COUNT);
3940
this.consumeAllTokens = settings.getAsBooleanLenientForPreEs6Indices(

core/src/main/java/org/elasticsearch/index/analysis/PatternCaptureGroupTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternCaptureGroupTokenFilterFactory.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
* specific language governing permissions and limitations
1717
* under the License.
1818
*/
19-
package org.elasticsearch.index.analysis;
19+
package org.elasticsearch.analysis.common;
2020

2121

2222
import org.apache.lucene.analysis.TokenFilter;
@@ -25,6 +25,7 @@
2525
import org.elasticsearch.common.settings.Settings;
2626
import org.elasticsearch.env.Environment;
2727
import org.elasticsearch.index.IndexSettings;
28+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
2829

2930
import java.util.regex.Pattern;
3031

@@ -34,7 +35,7 @@ public class PatternCaptureGroupTokenFilterFactory extends AbstractTokenFilterFa
3435
private static final String PATTERNS_KEY = "patterns";
3536
private static final String PRESERVE_ORIG_KEY = "preserve_original";
3637

37-
public PatternCaptureGroupTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
38+
PatternCaptureGroupTokenFilterFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) {
3839
super(indexSettings, name, settings);
3940
String[] regexes = settings.getAsArray(PATTERNS_KEY, null, false);
4041
if (regexes == null) {

modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceCharFilterFactory.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ public class PatternReplaceCharFilterFactory extends AbstractCharFilterFactory i
3535
private final Pattern pattern;
3636
private final String replacement;
3737

38-
public PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
38+
PatternReplaceCharFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
3939
super(indexSettings, name);
4040

4141
String sPattern = settings.get("pattern");

core/src/main/java/org/elasticsearch/index/analysis/PatternReplaceTokenFilterFactory.java renamed to modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternReplaceTokenFilterFactory.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,14 +17,15 @@
1717
* under the License.
1818
*/
1919

20-
package org.elasticsearch.index.analysis;
20+
package org.elasticsearch.analysis.common;
2121

2222
import org.apache.lucene.analysis.TokenStream;
2323
import org.apache.lucene.analysis.pattern.PatternReplaceFilter;
2424
import org.elasticsearch.common.regex.Regex;
2525
import org.elasticsearch.common.settings.Settings;
2626
import org.elasticsearch.env.Environment;
2727
import org.elasticsearch.index.IndexSettings;
28+
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
2829

2930
import java.util.regex.Pattern;
3031

modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,11 @@ protected Map<String, Class<?>> getTokenFilters() {
101101
filters.put("reversestring", ReverseTokenFilterFactory.class);
102102
filters.put("elision", ElisionTokenFilterFactory.class);
103103
filters.put("truncate", TruncateTokenFilterFactory.class);
104+
filters.put("limittokencount", LimitTokenCountFilterFactory.class);
105+
filters.put("commongrams", CommonGramsTokenFilterFactory.class);
106+
filters.put("commongramsquery", CommonGramsTokenFilterFactory.class);
107+
filters.put("patternreplace", PatternReplaceTokenFilterFactory.class);
108+
filters.put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class);
104109
return filters;
105110
}
106111

0 commit comments

Comments
 (0)