From 5b86b7f6268ee4f1d8a1005a6b369676be587719 Mon Sep 17 00:00:00 2001 From: Armin Date: Sat, 14 Jul 2018 16:15:27 +0200 Subject: [PATCH 1/3] Replace TokenizerFactory with Supplier Handles TODOs from https://github.com/elastic/elasticsearch/pull/24869 * Replaces all occurences of TokenizerFactory with Supplier * Remove unused parameter from constructor --- .../common/CharGroupTokenizerFactory.java | 4 +-- .../common/ClassicTokenizerFactory.java | 4 +-- .../analysis/common/CommonAnalysisPlugin.java | 7 ++-- .../common/EdgeNGramTokenizerFactory.java | 4 +-- .../common/KeywordTokenizerFactory.java | 4 +-- .../common/LetterTokenizerFactory.java | 4 +-- .../common/LowerCaseTokenizerFactory.java | 4 +-- .../common/NGramTokenizerFactory.java | 4 +-- .../common/PathHierarchyTokenizerFactory.java | 4 +-- .../common/PatternTokenizerFactory.java | 4 +-- .../SimplePatternSplitTokenizerFactory.java | 4 +-- .../common/SimplePatternTokenizerFactory.java | 4 +-- .../analysis/common/ThaiTokenizerFactory.java | 4 +-- .../common/UAX29URLEmailTokenizerFactory.java | 4 +-- .../common/WhitespaceTokenizerFactory.java | 4 +-- .../CharGroupTokenizerFactoryTests.java | 6 ++-- .../common/NGramTokenizerFactoryTests.java | 16 +++++----- .../PathHierarchyTokenizerFactoryTests.java | 14 ++++---- .../WhitespaceTokenizerFactoryTests.java | 10 +++--- .../index/analysis/IcuTokenizerFactory.java | 4 +-- .../analysis/icu/AnalysisICUPlugin.java | 5 +-- .../analysis/IcuTokenizerFactoryTests.java | 14 ++++---- .../analysis/SimpleIcuAnalysisTests.java | 4 ++- .../analysis/KuromojiTokenizerFactory.java | 4 +-- .../kuromoji/AnalysisKuromojiPlugin.java | 5 +-- .../index/analysis/KuromojiAnalysisTests.java | 21 ++++++------ .../index/analysis/NoriTokenizerFactory.java | 4 +-- .../analysis/nori/AnalysisNoriPlugin.java | 5 +-- .../index/analysis/NoriAnalysisTests.java | 5 +-- ...SmartChineseTokenizerTokenizerFactory.java | 4 +-- .../smartcn/AnalysisSmartChinesePlugin.java | 7 ++-- .../SimpleSmartChineseAnalysisTests.java | 4 ++- .../analyze/TransportAnalyzeAction.java | 32 +++++++++---------- .../analysis/AbstractTokenizerFactory.java | 8 +++-- .../index/analysis/AnalysisRegistry.java | 32 ++++++++++--------- .../index/analysis/CustomAnalyzer.java | 13 ++++---- .../analysis/CustomAnalyzerProvider.java | 8 +++-- .../analysis/CustomNormalizerProvider.java | 6 ++-- .../analysis/PreConfiguredTokenizer.java | 16 +++++----- .../analysis/StandardTokenizerFactory.java | 4 +-- .../index/analysis/TokenizerFactory.java | 26 --------------- .../indices/analysis/AnalysisModule.java | 9 +++--- .../elasticsearch/plugins/AnalysisPlugin.java | 4 +-- .../indices/TransportAnalyzeActionTests.java | 5 +-- .../index/analysis/CustomNormalizerTests.java | 4 ++- .../index/mapper/KeywordFieldMapperTests.java | 17 +++------- .../indices/analysis/AnalysisModuleTests.java | 4 +-- .../org/elasticsearch/test/ESTestCase.java | 6 ++-- .../elasticsearch/test/MockKeywordPlugin.java | 16 +++------- .../config/CategorizationAnalyzerConfig.java | 15 +++++---- .../core/LocalStateCompositeXPackPlugin.java | 6 ++-- .../xpack/ml/MachineLearning.java | 4 +-- .../MlClassicTokenizerFactory.java | 4 +-- 53 files changed, 210 insertions(+), 224 deletions(-) delete mode 100644 server/src/main/java/org/elasticsearch/index/analysis/TokenizerFactory.java diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactory.java index d4e1e794a309b..3614574d8927a 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactory.java @@ -39,7 +39,7 @@ public class CharGroupTokenizerFactory extends AbstractTokenizerFactory{ private boolean tokenizeOnSymbol = false; public CharGroupTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); for (final String c : settings.getAsList("tokenize_on_chars")) { if (c == null || c.length() == 0) { @@ -109,7 +109,7 @@ private char parseEscapedChar(final String s) { } @Override - public Tokenizer create() { + public Tokenizer get() { return new CharTokenizer() { @Override protected boolean isTokenChar(int c) { diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ClassicTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ClassicTokenizerFactory.java index e81f6b88d248c..6e396d6ad2efd 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ClassicTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ClassicTokenizerFactory.java @@ -35,12 +35,12 @@ public class ClassicTokenizerFactory extends AbstractTokenizerFactory { private final int maxTokenLength; ClassicTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); } @Override - public Tokenizer create() { + public Tokenizer get() { ClassicTokenizer tokenizer = new ClassicTokenizer(); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index d95af920a307b..60cda6ce5f09b 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; import org.apache.lucene.analysis.ar.ArabicStemFilter; @@ -122,7 +123,6 @@ import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy; import org.elasticsearch.plugins.AnalysisPlugin; @@ -134,6 +134,7 @@ import java.util.List; import java.util.Map; import java.util.TreeMap; +import java.util.function.Supplier; import static org.elasticsearch.plugins.AnalysisPlugin.requiresAnalysisSettings; @@ -262,8 +263,8 @@ public Map> getCharFilters() { } @Override - public Map> getTokenizers() { - Map> tokenizers = new TreeMap<>(); + public Map>> getTokenizers() { + Map>> tokenizers = new TreeMap<>(); tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new); tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new); tokenizers.put("thai", ThaiTokenizerFactory::new); diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerFactory.java index 55a527cc792c8..d58316055fba3 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerFactory.java @@ -36,14 +36,14 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory { private final CharMatcher matcher; EdgeNGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE); this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE); this.matcher = parseTokenChars(settings.getAsList("token_chars")); } @Override - public Tokenizer create() { + public Tokenizer get() { if (matcher == null) { return new EdgeNGramTokenizer(minGram, maxGram); } else { diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KeywordTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KeywordTokenizerFactory.java index abe88462cb996..f174ea4b48d0d 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KeywordTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KeywordTokenizerFactory.java @@ -31,12 +31,12 @@ public class KeywordTokenizerFactory extends AbstractTokenizerFactory { private final int bufferSize; KeywordTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); bufferSize = settings.getAsInt("buffer_size", 256); } @Override - public Tokenizer create() { + public Tokenizer get() { return new KeywordTokenizer(bufferSize); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LetterTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LetterTokenizerFactory.java index be98eb73a9cad..6c92c13d2b126 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LetterTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LetterTokenizerFactory.java @@ -29,11 +29,11 @@ public class LetterTokenizerFactory extends AbstractTokenizerFactory { LetterTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); } @Override - public Tokenizer create() { + public Tokenizer get() { return new LetterTokenizer(); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenizerFactory.java index 8f0c5f759aa64..6b3eb513d0f90 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenizerFactory.java @@ -30,11 +30,11 @@ public class LowerCaseTokenizerFactory extends AbstractTokenizerFactory implements MultiTermAwareComponent { LowerCaseTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); } @Override - public Tokenizer create() { + public Tokenizer get() { return new LowerCaseTokenizer(); } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenizerFactory.java index b67f67cb2fa75..903e854876449 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenizerFactory.java @@ -85,7 +85,7 @@ static CharMatcher parseTokenChars(List characterClasses) { } NGramTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); int maxAllowedNgramDiff = indexSettings.getMaxNgramDiff(); this.minGram = settings.getAsInt("min_gram", NGramTokenizer.DEFAULT_MIN_NGRAM_SIZE); this.maxGram = settings.getAsInt("max_gram", NGramTokenizer.DEFAULT_MAX_NGRAM_SIZE); @@ -105,7 +105,7 @@ static CharMatcher parseTokenChars(List characterClasses) { } @Override - public Tokenizer create() { + public Tokenizer get() { if (matcher == null) { return new NGramTokenizer(minGram, maxGram); } else { diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactory.java index c877fe6944e5b..e89ecddbd1187 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactory.java @@ -37,7 +37,7 @@ public class PathHierarchyTokenizerFactory extends AbstractTokenizerFactory { private final boolean reverse; PathHierarchyTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); bufferSize = settings.getAsInt("buffer_size", 1024); String delimiter = settings.get("delimiter"); if (delimiter == null) { @@ -61,7 +61,7 @@ public class PathHierarchyTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer create() { + public Tokenizer get() { if (reverse) { return new ReversePathHierarchyTokenizer(bufferSize, delimiter, replacement, skip); } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternTokenizerFactory.java index f850b68ac9829..ba4a27d29d09e 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternTokenizerFactory.java @@ -35,7 +35,7 @@ public class PatternTokenizerFactory extends AbstractTokenizerFactory { private final int group; PatternTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/); if (sPattern == null) { @@ -47,7 +47,7 @@ public class PatternTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer create() { + public Tokenizer get() { return new PatternTokenizer(pattern, group); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternSplitTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternSplitTokenizerFactory.java index f861ec3792f5e..8846a217d1232 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternSplitTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternSplitTokenizerFactory.java @@ -31,13 +31,13 @@ public class SimplePatternSplitTokenizerFactory extends AbstractTokenizerFactory private final String pattern; public SimplePatternSplitTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); pattern = settings.get("pattern", ""); } @Override - public Tokenizer create() { + public Tokenizer get() { return new SimplePatternSplitTokenizer(pattern); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternTokenizerFactory.java index 6db3cfa67a318..ea25d6e404003 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternTokenizerFactory.java @@ -31,13 +31,13 @@ public class SimplePatternTokenizerFactory extends AbstractTokenizerFactory { private final String pattern; public SimplePatternTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); pattern = settings.get("pattern", ""); } @Override - public Tokenizer create() { + public Tokenizer get() { return new SimplePatternTokenizer(pattern); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ThaiTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ThaiTokenizerFactory.java index b76aca42d36ee..cf551f3422eb8 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ThaiTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ThaiTokenizerFactory.java @@ -32,11 +32,11 @@ public class ThaiTokenizerFactory extends AbstractTokenizerFactory { ThaiTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); } @Override - public Tokenizer create() { + public Tokenizer get() { return new ThaiTokenizer(); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UAX29URLEmailTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UAX29URLEmailTokenizerFactory.java index 8040c88ea7fa5..075d6f88ab838 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UAX29URLEmailTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UAX29URLEmailTokenizerFactory.java @@ -32,12 +32,12 @@ public class UAX29URLEmailTokenizerFactory extends AbstractTokenizerFactory { private final int maxTokenLength; UAX29URLEmailTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); } @Override - public Tokenizer create() { + public Tokenizer get() { UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactory.java index 1f89d4688136f..548cd2e9f810c 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactory.java @@ -34,12 +34,12 @@ public class WhitespaceTokenizerFactory extends AbstractTokenizerFactory { private Integer maxTokenLength; WhitespaceTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); maxTokenLength = settings.getAsInt(MAX_TOKEN_LENGTH, StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); } @Override - public Tokenizer create() { + public Tokenizer get() { return new WhitespaceTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, maxTokenLength); } } diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java index 1447531aa8731..f2ccff468c5e3 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java @@ -43,7 +43,7 @@ public void testParseTokenChars() { new String[] { "commas" }, new String[] { "a", "b", "c", "\\$" })) { final Settings settings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", conf).build(); - expectThrows(RuntimeException.class, () -> new CharGroupTokenizerFactory(indexProperties, null, name, settings).create()); + expectThrows(RuntimeException.class, () -> new CharGroupTokenizerFactory(indexProperties, null, name, settings).get()); } for (String[] conf : Arrays.asList( @@ -56,7 +56,7 @@ public void testParseTokenChars() { new String[] { "\\r" }, new String[] { "f", "o", "o", "symbol" })) { final Settings settings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", Arrays.asList(conf)).build(); - new CharGroupTokenizerFactory(indexProperties, null, name, settings).create(); + new CharGroupTokenizerFactory(indexProperties, null, name, settings).get(); // no exception } } @@ -67,7 +67,7 @@ public void testTokenization() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); final Settings settings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", "whitespace", ":", "\\u0024").build(); Tokenizer tokenizer = new CharGroupTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), - null, name, settings).create(); + null, name, settings).get(); tokenizer.setReader(new StringReader("foo bar $34 test:test2")); assertTokenStreamContents(tokenizer, new String[] {"foo", "bar", "34", "test", "test2"}); } diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java index 1cf6ef4696d04..68da0aae933db 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java @@ -51,7 +51,7 @@ public void testParseTokenChars() { final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3) .put("token_chars", tokenChars).build(); try { - new NGramTokenizerFactory(indexProperties, null, name, settings).create(); + new NGramTokenizerFactory(indexProperties, null, name, settings).get(); fail(); } catch (IllegalArgumentException expected) { // OK @@ -62,7 +62,7 @@ public void testParseTokenChars() { .put("token_chars", tokenChars).build(); indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings); - new NGramTokenizerFactory(indexProperties, null, name, settings).create(); + new NGramTokenizerFactory(indexProperties, null, name, settings).get(); // no exception } } @@ -75,7 +75,7 @@ public void testNoTokenChars() throws IOException { final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4) .putList("token_chars", new String[0]).build(); Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings) - .create(); + .get(); tokenizer.setReader(new StringReader("1.34")); assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"}); } @@ -88,13 +88,13 @@ public void testPreTokenization() throws IOException { Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3) .put("token_chars", "letter,digit").build(); Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings) - .create(); + .get(); tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f ")); assertTokenStreamContents(tokenizer, new String[] {"Åb", "Åbc", "bc", "dé", "déf", "éf", "g\uD801\uDC00", "g\uD801\uDC00f", "\uD801\uDC00f"}); settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3) .put("token_chars", "letter,digit,punctuation,whitespace,symbol").build(); - tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(); + tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).get(); tokenizer.setReader(new StringReader(" a!$ 9")); assertTokenStreamContents(tokenizer, new String[] {" a", " a!", "a!", "a!$", "!$", "!$ ", "$ ", "$ 9", " 9"}); @@ -107,14 +107,14 @@ public void testPreTokenizationEdge() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build(); Tokenizer tokenizer = - new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(); + new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).get(); tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f ")); assertTokenStreamContents(tokenizer, new String[] {"Åb", "Åbc", "dé", "déf", "g\uD801\uDC00", "g\uD801\uDC00f"}); settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3) .put("token_chars", "letter,digit,punctuation,whitespace,symbol").build(); tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings) - .create(); + .get(); tokenizer.setReader(new StringReader(" a!$ 9")); assertTokenStreamContents(tokenizer, new String[] {" a", " a!"}); @@ -163,7 +163,7 @@ public void testMaxNGramDiffException() throws Exception{ final Settings settings = newAnalysisSettingsBuilder().put("min_gram", min_gram).put("max_gram", max_gram).build(); IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> - new NGramTokenizerFactory(indexProperties, null, name, settings).create()); + new NGramTokenizerFactory(indexProperties, null, name, settings).get()); assertEquals( "The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: [" + maxAllowedNgramDiff + "] but was [" + ngramDiff + "]. This limit can be set by changing the [" diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java index 0b545d3355201..91e3990a4f317 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java @@ -36,7 +36,7 @@ public void testDefaults() throws IOException { final Index index = new Index("test", "_na_"); final Settings indexSettings = newAnalysisSettingsBuilder().build(); Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", Settings.EMPTY).create(); + "path-hierarchy-tokenizer", Settings.EMPTY).get(); tokenizer.setReader(new StringReader("/one/two/three")); assertTokenStreamContents(tokenizer, new String[] {"/one", "/one/two", "/one/two/three"}); } @@ -46,7 +46,7 @@ public void testReverse() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); Settings settings = newAnalysisSettingsBuilder().put("reverse", true).build(); Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).create(); + "path-hierarchy-tokenizer", settings).get(); tokenizer.setReader(new StringReader("/one/two/three")); assertTokenStreamContents(tokenizer, new String[] {"/one/two/three", "one/two/three", "two/three", "three"}); } @@ -56,7 +56,7 @@ public void testDelimiter() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); Settings settings = newAnalysisSettingsBuilder().put("delimiter", "-").build(); Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).create(); + "path-hierarchy-tokenizer", settings).get(); tokenizer.setReader(new StringReader("/one/two/three")); assertTokenStreamContents(tokenizer, new String[] {"/one/two/three"}); tokenizer.setReader(new StringReader("one-two-three")); @@ -68,7 +68,7 @@ public void testReplace() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); Settings settings = newAnalysisSettingsBuilder().put("replacement", "-").build(); Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).create(); + "path-hierarchy-tokenizer", settings).get(); tokenizer.setReader(new StringReader("/one/two/three")); assertTokenStreamContents(tokenizer, new String[] {"-one", "-one-two", "-one-two-three"}); tokenizer.setReader(new StringReader("one-two-three")); @@ -80,7 +80,7 @@ public void testSkip() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); Settings settings = newAnalysisSettingsBuilder().put("skip", 2).build(); Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).create(); + "path-hierarchy-tokenizer", settings).get(); tokenizer.setReader(new StringReader("/one/two/three/four/five")); assertTokenStreamContents(tokenizer, new String[] {"/three", "/three/four", "/three/four/five"}); } @@ -93,7 +93,7 @@ public void testDelimiterExceptions() { Settings settings = newAnalysisSettingsBuilder().put("delimiter", delimiter).build(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).create()); + "path-hierarchy-tokenizer", settings).get()); assertEquals("delimiter must be a one char value", e.getMessage()); } { @@ -101,7 +101,7 @@ public void testDelimiterExceptions() { Settings settings = newAnalysisSettingsBuilder().put("replacement", replacement).build(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).create()); + "path-hierarchy-tokenizer", settings).get()); assertEquals("replacement must be a one char value", e.getMessage()); } } diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactoryTests.java index f34b694fbf60f..6852687009053 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactoryTests.java @@ -42,7 +42,7 @@ public void testSimpleWhiteSpaceTokenizer() throws IOException { final Settings indexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(); IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(new Index("test", "_na_"), indexSettings); WhitespaceTokenizer tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", - Settings.EMPTY).create(); + Settings.EMPTY).get(); try (Reader reader = new StringReader("one, two, three")) { tokenizer.setReader(reader); @@ -55,7 +55,7 @@ public void testMaxTokenLength() throws IOException { IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(new Index("test", "_na_"), indexSettings); final Settings settings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 2).build(); WhitespaceTokenizer tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", - settings).create(); + settings).get(); try (Reader reader = new StringReader("one, two, three")) { tokenizer.setReader(reader); assertTokenStreamContents(tokenizer, new String[] { "on", "e,", "tw", "o,", "th", "re", "e" }); @@ -63,7 +63,7 @@ public void testMaxTokenLength() throws IOException { final Settings defaultSettings = Settings.EMPTY; tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", defaultSettings) - .create(); + .get(); String veryLongToken = RandomStrings.randomAsciiAlphanumOfLength(random(), 256); try (Reader reader = new StringReader(veryLongToken)) { tokenizer.setReader(reader); @@ -72,12 +72,12 @@ public void testMaxTokenLength() throws IOException { final Settings tooLongSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 1024 * 1024 + 1).build(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", tooLongSettings).create()); + () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", tooLongSettings).get()); assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 1048577", e.getMessage()); final Settings negativeSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, -1).build(); e = expectThrows(IllegalArgumentException.class, - () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", negativeSettings).create()); + () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", negativeSettings).get()); assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage()); } } diff --git a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java index 84c611c0f8132..213ecc223dbfe 100644 --- a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java +++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java @@ -47,12 +47,12 @@ public class IcuTokenizerFactory extends AbstractTokenizerFactory { private static final String RULE_FILES = "rule_files"; public IcuTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); config = getIcuConfig(environment, settings); } @Override - public Tokenizer create() { + public Tokenizer get() { if (config == null) { return new ICUTokenizer(); }else{ diff --git a/plugins/analysis-icu/src/main/java/org/elasticsearch/plugin/analysis/icu/AnalysisICUPlugin.java b/plugins/analysis-icu/src/main/java/org/elasticsearch/plugin/analysis/icu/AnalysisICUPlugin.java index 58ebdc8e2a801..7322199919c43 100644 --- a/plugins/analysis-icu/src/main/java/org/elasticsearch/plugin/analysis/icu/AnalysisICUPlugin.java +++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/plugin/analysis/icu/AnalysisICUPlugin.java @@ -21,6 +21,7 @@ import static java.util.Collections.singletonMap; +import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.IcuCollationTokenFilterFactory; @@ -30,7 +31,6 @@ import org.elasticsearch.index.analysis.IcuTokenizerFactory; import org.elasticsearch.index.analysis.IcuTransformTokenFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.ICUCollationKeywordFieldMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; @@ -43,6 +43,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.function.Supplier; public class AnalysisICUPlugin extends Plugin implements AnalysisPlugin, MapperPlugin { @Override @@ -61,7 +62,7 @@ public Map> getTokenFilters() { } @Override - public Map> getTokenizers() { + public Map>> getTokenizers() { return singletonMap("icu_tokenizer", IcuTokenizerFactory::new); } diff --git a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/IcuTokenizerFactoryTests.java b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/IcuTokenizerFactoryTests.java index 8cce4c13542c6..9d615b15ae7ca 100644 --- a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/IcuTokenizerFactoryTests.java +++ b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/IcuTokenizerFactoryTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -34,6 +35,7 @@ import java.io.StringReader; import java.nio.file.Files; import java.nio.file.Path; +import java.util.function.Supplier; import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; @@ -42,8 +44,8 @@ public class IcuTokenizerFactoryTests extends ESTestCase { public void testSimpleIcuTokenizer() throws IOException { TestAnalysis analysis = createTestAnalysis(); - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("icu_tokenizer"); - ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create(); + Supplier tokenizerFactory = analysis.tokenizer.get("icu_tokenizer"); + ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.get(); Reader reader = new StringReader("向日葵, one-two"); tokenizer.setReader(reader); @@ -54,8 +56,8 @@ public void testIcuCustomizeRuleFile() throws IOException { TestAnalysis analysis = createTestAnalysis(); // test the tokenizer with single rule file - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("user_rule_tokenizer"); - ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create(); + Supplier tokenizerFactory = analysis.tokenizer.get("user_rule_tokenizer"); + ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.get(); Reader reader = new StringReader ("One-two punch. Brang-, not brung-it. This one--not that one--is the right one, -ish."); @@ -69,8 +71,8 @@ public void testMultipleIcuCustomizeRuleFiles() throws IOException { TestAnalysis analysis = createTestAnalysis(); // test the tokenizer with two rule files - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("multi_rule_tokenizer"); - ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create(); + Supplier tokenizerFactory = analysis.tokenizer.get("multi_rule_tokenizer"); + ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.get(); StringReader reader = new StringReader ("Some English. Немного русский. ข้อความภาษาไทยเล็ก ๆ น้อย ๆ More English."); diff --git a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java index 3cd675c221a27..edff9e86d4da0 100644 --- a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java +++ b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java @@ -19,12 +19,14 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin; import org.elasticsearch.test.ESTestCase; import java.io.IOException; +import java.util.function.Supplier; import static org.hamcrest.Matchers.instanceOf; @@ -32,7 +34,7 @@ public class SimpleIcuAnalysisTests extends ESTestCase { public void testDefaultsIcuAnalysis() throws IOException { TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisICUPlugin()); - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("icu_tokenizer"); + Supplier tokenizerFactory = analysis.tokenizer.get("icu_tokenizer"); assertThat(tokenizerFactory, instanceOf(IcuTokenizerFactory.class)); TokenFilterFactory filterFactory = analysis.tokenFilter.get("icu_normalizer"); diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java index 2f00e68a75ebc..c4f927b4d5a09 100644 --- a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java +++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java @@ -45,7 +45,7 @@ public class KuromojiTokenizerFactory extends AbstractTokenizerFactory { private boolean discartPunctuation; public KuromojiTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); mode = getMode(settings); userDictionary = getUserDictionary(env, settings); discartPunctuation = settings.getAsBoolean("discard_punctuation", true); @@ -86,7 +86,7 @@ public static JapaneseTokenizer.Mode getMode(Settings settings) { } @Override - public Tokenizer create() { + public Tokenizer get() { JapaneseTokenizer t = new JapaneseTokenizer(userDictionary, discartPunctuation, mode); int nBestCost = this.nBestCost; if (nBestExamples != null) { diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java index c4b4db53c4a2c..e662e11517d0d 100644 --- a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java +++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -20,6 +20,7 @@ package org.elasticsearch.plugin.analysis.kuromoji; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.JapaneseStopTokenFilterFactory; @@ -32,13 +33,13 @@ import org.elasticsearch.index.analysis.KuromojiReadingFormFilterFactory; import org.elasticsearch.index.analysis.KuromojiTokenizerFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import java.util.HashMap; import java.util.Map; +import java.util.function.Supplier; import static java.util.Collections.singletonMap; @@ -61,7 +62,7 @@ public Map> getTokenFilters() { } @Override - public Map> getTokenizers() { + public Map>> getTokenizers() { return singletonMap("kuromoji_tokenizer", KuromojiTokenizerFactory::new); } diff --git a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index b1b23f7f1b6a4..df950237d5030 100644 --- a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -38,6 +38,7 @@ import java.io.StringReader; import java.nio.file.Files; import java.nio.file.Path; +import java.util.function.Supplier; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -48,7 +49,7 @@ public class KuromojiAnalysisTests extends ESTestCase { public void testDefaultsKuromojiAnalysis() throws IOException { TestAnalysis analysis = createTestAnalysis(); - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_tokenizer"); + Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_tokenizer"); assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class)); TokenFilterFactory filterFactory = analysis.tokenFilter.get("kuromoji_part_of_speech"); @@ -245,11 +246,11 @@ private String readFully(Reader reader) throws IOException { public void testKuromojiUserDict() throws IOException { TestAnalysis analysis = createTestAnalysis(); - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_user_dict"); + Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_user_dict"); String source = "私は制限スピードを超える。"; String[] expected = new String[]{"私", "は", "制限スピード", "を", "超える"}; - Tokenizer tokenizer = tokenizerFactory.create(); + Tokenizer tokenizer = tokenizerFactory.get(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenizer, expected); } @@ -257,39 +258,39 @@ public void testKuromojiUserDict() throws IOException { // fix #59 public void testKuromojiEmptyUserDict() throws IOException { TestAnalysis analysis = createTestAnalysis(); - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_empty_user_dict"); + Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_empty_user_dict"); assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class)); } public void testNbestCost() throws IOException { TestAnalysis analysis = createTestAnalysis(); - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_cost"); + Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_cost"); String source = "鳩山積み"; String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"}; - Tokenizer tokenizer = tokenizerFactory.create(); + Tokenizer tokenizer = tokenizerFactory.get(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenizer, expected); } public void testNbestExample() throws IOException { TestAnalysis analysis = createTestAnalysis(); - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_examples"); + Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_examples"); String source = "鳩山積み"; String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"}; - Tokenizer tokenizer = tokenizerFactory.create(); + Tokenizer tokenizer = tokenizerFactory.get(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenizer, expected); } public void testNbestBothOptions() throws IOException { TestAnalysis analysis = createTestAnalysis(); - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_both"); + Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_both"); String source = "鳩山積み"; String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"}; - Tokenizer tokenizer = tokenizerFactory.create(); + Tokenizer tokenizer = tokenizerFactory.get(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenizer, expected); diff --git a/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java b/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java index 346cc84e5e6b4..60ae7dc8bd660 100644 --- a/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java +++ b/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java @@ -38,7 +38,7 @@ public class NoriTokenizerFactory extends AbstractTokenizerFactory { private final KoreanTokenizer.DecompoundMode decompoundMode; public NoriTokenizerFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); decompoundMode = getMode(settings); userDictionary = getUserDictionary(env, settings); } @@ -65,7 +65,7 @@ public static KoreanTokenizer.DecompoundMode getMode(Settings settings) { } @Override - public Tokenizer create() { + public Tokenizer get() { return new KoreanTokenizer(KoreanTokenizer.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, userDictionary, decompoundMode, false); } diff --git a/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/AnalysisNoriPlugin.java b/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/AnalysisNoriPlugin.java index 6e9baa7acd26c..4fc9a93fab96f 100644 --- a/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/AnalysisNoriPlugin.java +++ b/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/AnalysisNoriPlugin.java @@ -20,19 +20,20 @@ package org.elasticsearch.plugin.analysis.nori; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.NoriAnalyzerProvider; import org.elasticsearch.index.analysis.NoriPartOfSpeechStopFilterFactory; import org.elasticsearch.index.analysis.NoriReadingFormFilterFactory; import org.elasticsearch.index.analysis.NoriTokenizerFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import java.util.HashMap; import java.util.Map; +import java.util.function.Supplier; import static java.util.Collections.singletonMap; @@ -46,7 +47,7 @@ public Map> getTokenFilters() { } @Override - public Map> getTokenizers() { + public Map>> getTokenizers() { return singletonMap("nori_tokenizer", NoriTokenizerFactory::new); } diff --git a/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java b/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java index fa5858a7bbbb8..a2202744440e1 100644 --- a/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java +++ b/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java @@ -37,6 +37,7 @@ import java.io.StringReader; import java.nio.file.Files; import java.nio.file.Path; +import java.util.function.Supplier; import static org.hamcrest.Matchers.instanceOf; @@ -44,7 +45,7 @@ public class NoriAnalysisTests extends ESTokenStreamTestCase { public void testDefaultsNoriAnalysis() throws IOException { TestAnalysis analysis = createTestAnalysis(Settings.EMPTY); - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("nori_tokenizer"); + Supplier tokenizerFactory = analysis.tokenizer.get("nori_tokenizer"); assertThat(tokenizerFactory, instanceOf(NoriTokenizerFactory.class)); TokenFilterFactory filterFactory = analysis.tokenFilter.get("nori_part_of_speech"); @@ -97,7 +98,7 @@ public void testNoriTokenizer() throws Exception { .put("index.analysis.tokenizer.my_tokenizer.decompound_mode", "mixed") .build(); TestAnalysis analysis = createTestAnalysis(settings); - Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").create(); + Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").get(); tokenizer.setReader(new StringReader("뿌리가 깊은 나무")); assertTokenStreamContents(tokenizer, new String[] {"뿌리", "가", "깊", "은", "나무"}); tokenizer.setReader(new StringReader("가늠표")); diff --git a/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseTokenizerTokenizerFactory.java b/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseTokenizerTokenizerFactory.java index 9d38729615205..832e914bb2dda 100644 --- a/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseTokenizerTokenizerFactory.java +++ b/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseTokenizerTokenizerFactory.java @@ -28,11 +28,11 @@ public class SmartChineseTokenizerTokenizerFactory extends AbstractTokenizerFactory { public SmartChineseTokenizerTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); } @Override - public Tokenizer create() { + public Tokenizer get() { return new HMMChineseTokenizer(); } } diff --git a/plugins/analysis-smartcn/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java b/plugins/analysis-smartcn/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java index b11a157c149d6..ad0b99b702c1c 100644 --- a/plugins/analysis-smartcn/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java +++ b/plugins/analysis-smartcn/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java @@ -20,18 +20,19 @@ package org.elasticsearch.plugin.analysis.smartcn; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.SmartChineseAnalyzerProvider; import org.elasticsearch.index.analysis.SmartChineseNoOpTokenFilterFactory; import org.elasticsearch.index.analysis.SmartChineseTokenizerTokenizerFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import java.util.HashMap; import java.util.Map; +import java.util.function.Supplier; import static java.util.Collections.singletonMap; @@ -43,8 +44,8 @@ public Map> getTokenFilters() { } @Override - public Map> getTokenizers() { - Map> extra = new HashMap<>(); + public Map>> getTokenizers() { + Map>> extra = new HashMap<>(); extra.put("smartcn_tokenizer", SmartChineseTokenizerTokenizerFactory::new); // This is an alias to "smartcn_tokenizer"; it's here for backwards compat extra.put("smartcn_sentence", SmartChineseTokenizerTokenizerFactory::new); diff --git a/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseAnalysisTests.java b/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseAnalysisTests.java index e2d6f6db51376..2bb0586b62b9d 100644 --- a/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseAnalysisTests.java +++ b/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseAnalysisTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.plugin.analysis.smartcn.AnalysisSmartChinesePlugin; @@ -26,6 +27,7 @@ import org.hamcrest.MatcherAssert; import java.io.IOException; +import java.util.function.Supplier; import static org.hamcrest.Matchers.instanceOf; @@ -33,7 +35,7 @@ public class SimpleSmartChineseAnalysisTests extends ESTestCase { public void testDefaultsIcuAnalysis() throws IOException { final TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisSmartChinesePlugin()); - TokenizerFactory tokenizerFactory = analysis.tokenizer.get("smartcn_tokenizer"); + Supplier tokenizerFactory = analysis.tokenizer.get("smartcn_tokenizer"); MatcherAssert.assertThat(tokenizerFactory, instanceOf(SmartChineseTokenizerTokenizerFactory.class)); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index 35f1f725b65ad..43b60b2207cfb 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -53,7 +53,6 @@ import org.elasticsearch.index.analysis.MultiTermAwareComponent; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.shard.ShardId; @@ -72,6 +71,7 @@ import java.util.Map; import java.util.Set; import java.util.TreeMap; +import java.util.function.Supplier; /** * Transport action used to execute analyze requests @@ -185,7 +185,7 @@ public static AnalyzeResponse analyze(AnalyzeRequest request, String field, Anal } } else if (request.tokenizer() != null) { final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings(); - Tuple tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers, + Tuple> tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers, analysisRegistry, environment); List charFilterFactoryList = @@ -213,7 +213,7 @@ public static AnalyzeResponse analyze(AnalyzeRequest request, String field, Anal parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, true); final String keywordTokenizerName = "keyword"; - TokenizerFactory keywordTokenizerFactory = getTokenizerFactory(analysisRegistry, environment, keywordTokenizerName); + Supplier keywordTokenizerFactory = getTokenizerFactory(analysisRegistry, environment, keywordTokenizerName); List tokenFilterFactoryList = parseTokenFilterFactories(request, indexSettings, analysisRegistry, environment, new Tuple<>(keywordTokenizerName, keywordTokenizerFactory), charFilterFactoryList, true); @@ -306,7 +306,7 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy if (customAnalyzer != null) { // customAnalyzer = divide charfilter, tokenizer tokenfilters CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters(); - TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory(); + Supplier tokenizerFactory = customAnalyzer.tokenizerFactory(); TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters(); String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length]; @@ -330,7 +330,7 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy } // analyzing only tokenizer - Tokenizer tokenizer = tokenizerFactory.create(); + Tokenizer tokenizer = tokenizerFactory.get(); tokenizer.setReader(reader); tokenizerTokenListCreator.analyze(tokenizer, customAnalyzer, field, includeAttributes); @@ -381,12 +381,12 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy return detailResponse; } - private static TokenStream createStackedTokenStream(String source, CharFilterFactory[] charFilterFactories, TokenizerFactory tokenizerFactory, TokenFilterFactory[] tokenFilterFactories, int current) { + private static TokenStream createStackedTokenStream(String source, CharFilterFactory[] charFilterFactories, Supplier tokenizerFactory, TokenFilterFactory[] tokenFilterFactories, int current) { Reader reader = new StringReader(source); for (CharFilterFactory charFilterFactory : charFilterFactories) { reader = charFilterFactory.create(reader); } - Tokenizer tokenizer = tokenizerFactory.create(); + Tokenizer tokenizer = tokenizerFactory.get(); tokenizer.setReader(reader); TokenStream tokenStream = tokenizer; for (int i = 0; i < current; i++) { @@ -571,7 +571,7 @@ private static List parseCharFilterFactories(AnalyzeRequest r } private static List parseTokenFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry, - Environment environment, Tuple tokenizerFactory, + Environment environment, Tuple> tokenizerFactory, List charFilterFactoryList, boolean normalizer) throws IOException { List tokenFilterFactoryList = new ArrayList<>(); if (request.tokenFilters() != null && request.tokenFilters().size() > 0) { @@ -632,10 +632,10 @@ private static List parseTokenFilterFactories(AnalyzeRequest return tokenFilterFactoryList; } - private static Tuple parseTokenizerFactory(AnalyzeRequest request, IndexAnalyzers indexAnalzyers, - AnalysisRegistry analysisRegistry, Environment environment) throws IOException { + private static Tuple> parseTokenizerFactory(AnalyzeRequest request, IndexAnalyzers indexAnalzyers, + AnalysisRegistry analysisRegistry, Environment environment) throws IOException { String name; - TokenizerFactory tokenizerFactory; + Supplier tokenizerFactory; final AnalyzeRequest.NameOrDefinition tokenizer = request.tokenizer(); // parse anonymous settings if (tokenizer.definition != null) { @@ -644,7 +644,7 @@ private static Tuple parseTokenizerFactory(AnalyzeRequ if (tokenizerTypeName == null) { throw new IllegalArgumentException("Missing [type] setting for anonymous tokenizer: " + tokenizer.definition); } - AnalysisModule.AnalysisProvider tokenizerFactoryFactory = + AnalysisModule.AnalysisProvider> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizerTypeName); if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizerTypeName + "]"); @@ -653,7 +653,7 @@ private static Tuple parseTokenizerFactory(AnalyzeRequ name = "_anonymous_tokenizer"; tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings); } else { - AnalysisModule.AnalysisProvider tokenizerFactoryFactory; + AnalysisModule.AnalysisProvider> tokenizerFactoryFactory; if (indexAnalzyers == null) { tokenizerFactory = getTokenizerFactory(analysisRegistry, environment, tokenizer.name); name = tokenizer.name; @@ -671,9 +671,9 @@ private static Tuple parseTokenizerFactory(AnalyzeRequ return new Tuple<>(name, tokenizerFactory); } - private static TokenizerFactory getTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment, String name) throws IOException { - AnalysisModule.AnalysisProvider tokenizerFactoryFactory; - TokenizerFactory tokenizerFactory; + private static Supplier getTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment, String name) throws IOException { + AnalysisModule.AnalysisProvider> tokenizerFactoryFactory; + Supplier tokenizerFactory; tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name); if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("failed to find global tokenizer under [" + name + "]"); diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java index bf6b2fd7c5b47..efb4bf6b61cae 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java @@ -19,16 +19,18 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.Version; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.IndexSettings; -public abstract class AbstractTokenizerFactory extends AbstractIndexComponent implements TokenizerFactory { +import java.util.function.Supplier; + +public abstract class AbstractTokenizerFactory extends AbstractIndexComponent implements Supplier { protected final Version version; - // TODO drop `String ignored` in a followup - public AbstractTokenizerFactory(IndexSettings indexSettings, String ignored, Settings settings) { + public AbstractTokenizerFactory(IndexSettings indexSettings, Settings settings) { super(indexSettings); this.version = Analysis.parseAnalysisVersion(this.indexSettings.getSettings(), settings, logger); } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index c61a7cf070680..bf560b2e83cb4 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.ElasticsearchException; @@ -39,6 +40,7 @@ import java.util.Locale; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; +import java.util.function.Supplier; import java.util.stream.Collectors; import static java.util.Collections.unmodifiableMap; @@ -57,14 +59,14 @@ public final class AnalysisRegistry implements Closeable { private final Environment environment; private final Map> charFilters; private final Map> tokenFilters; - private final Map> tokenizers; + private final Map>> tokenizers; private final Map>> analyzers; private final Map>> normalizers; public AnalysisRegistry(Environment environment, Map> charFilters, Map> tokenFilters, - Map> tokenizers, + Map>> tokenizers, Map>> analyzers, Map>> normalizers, Map preConfiguredCharFilters, @@ -96,9 +98,9 @@ public static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, } /** - * Returns a registered {@link TokenizerFactory} provider by name or null if the tokenizer was not registered + * Returns a registered {@link Tokenizer} provider by name or null if the tokenizer was not registered */ - public AnalysisModule.AnalysisProvider getTokenizerProvider(String tokenizer) { + public AnalysisModule.AnalysisProvider> getTokenizerProvider(String tokenizer) { return tokenizers.getOrDefault(tokenizer, this.prebuiltAnalysis.getTokenizerFactory(tokenizer)); } @@ -149,7 +151,7 @@ public void close() throws IOException { public IndexAnalyzers build(IndexSettings indexSettings) throws IOException { final Map charFilterFactories = buildCharFilterFactories(indexSettings); - final Map tokenizerFactories = buildTokenizerFactories(indexSettings); + final Map> tokenizerFactories = buildTokenizerFactories(indexSettings); final Map tokenFilterFactories = buildTokenFilterFactories(indexSettings); final Map> analyzierFactories = buildAnalyzerFactories(indexSettings); final Map> normalizerFactories = buildNormalizerFactories(indexSettings); @@ -180,7 +182,7 @@ public Map buildTokenFilterFactories(IndexSettings i return mappings; } - public Map buildTokenizerFactories(IndexSettings indexSettings) throws IOException { + public Map> buildTokenizerFactories(IndexSettings indexSettings) throws IOException { final Map tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER); return buildMapping(Component.TOKENIZER, indexSettings, tokenizersSettings, tokenizers, prebuiltAnalysis.preConfiguredTokenizers); } @@ -202,14 +204,14 @@ public Map> buildNormalizerFactories(IndexSettings i } /** - * Returns a registered {@link TokenizerFactory} provider by {@link IndexSettings} - * or a registered {@link TokenizerFactory} provider by predefined name + * Returns a registered {@link Tokenizer} provider by {@link IndexSettings} + * or a registered {@link Tokenizer} provider by predefined name * or null if the tokenizer was not registered * @param tokenizer global or defined tokenizer name * @param indexSettings an index settings - * @return {@link TokenizerFactory} provider or null + * @return {@link Tokenizer} provider or null */ - public AnalysisProvider getTokenizerProvider(String tokenizer, IndexSettings indexSettings) { + public AnalysisProvider> getTokenizerProvider(String tokenizer, IndexSettings indexSettings) { final Map tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer"); if (tokenizerSettings.containsKey(tokenizer)) { Settings currentSettings = tokenizerSettings.get(tokenizer); @@ -404,7 +406,7 @@ private static class PrebuiltAnalysis implements Closeable { final Map>> analyzerProviderFactories; final Map> preConfiguredTokenFilters; - final Map> preConfiguredTokenizers; + final Map>> preConfiguredTokenizers; final Map> preConfiguredCharFilterFactories; private PrebuiltAnalysis( @@ -435,7 +437,7 @@ public AnalysisModule.AnalysisProvider getTokenFilterFactory return preConfiguredTokenFilters.get(name); } - public AnalysisModule.AnalysisProvider getTokenizerFactory(String name) { + public AnalysisModule.AnalysisProvider> getTokenizerFactory(String name) { return preConfiguredTokenizers.get(name); } @@ -453,7 +455,7 @@ public void close() throws IOException { public IndexAnalyzers build(IndexSettings indexSettings, Map> analyzerProviders, Map> normalizerProviders, - Map tokenizerFactoryFactories, + Map> tokenizerFactoryFactories, Map charFilterFactoryFactories, Map tokenFilterFactoryFactories) { @@ -507,7 +509,7 @@ private void processAnalyzerFactory(IndexSettings indexSettings, String name, AnalyzerProvider analyzerFactory, Map analyzers, Map tokenFilters, - Map charFilters, Map tokenizers) { + Map charFilters, Map> tokenizers) { /* * Lucene defaults positionIncrementGap to 0 in all analyzers but * Elasticsearch defaults them to 0 only before version 2.0 @@ -557,7 +559,7 @@ private void processNormalizerFactory( AnalyzerProvider normalizerFactory, Map normalizers, String tokenizerName, - TokenizerFactory tokenizerFactory, + Supplier tokenizerFactory, Map tokenFilters, Map charFilters) { if (tokenizerFactory == null) { diff --git a/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java b/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java index d70b4628f532c..533c7fbf964fd 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java @@ -24,11 +24,12 @@ import org.apache.lucene.analysis.Tokenizer; import java.io.Reader; +import java.util.function.Supplier; public final class CustomAnalyzer extends Analyzer { private final String tokenizerName; - private final TokenizerFactory tokenizerFactory; + private final Supplier tokenizerFactory; private final CharFilterFactory[] charFilters; @@ -37,13 +38,13 @@ public final class CustomAnalyzer extends Analyzer { private final int positionIncrementGap; private final int offsetGap; - public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters, + public CustomAnalyzer(String tokenizerName, Supplier tokenizerFactory, CharFilterFactory[] charFilters, TokenFilterFactory[] tokenFilters) { this(tokenizerName, tokenizerFactory, charFilters, tokenFilters, 0, -1); } - public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters, - TokenFilterFactory[] tokenFilters, int positionIncrementGap, int offsetGap) { + public CustomAnalyzer(String tokenizerName, Supplier tokenizerFactory, CharFilterFactory[] charFilters, + TokenFilterFactory[] tokenFilters, int positionIncrementGap, int offsetGap) { this.tokenizerName = tokenizerName; this.tokenizerFactory = tokenizerFactory; this.charFilters = charFilters; @@ -59,7 +60,7 @@ public String getTokenizerName() { return tokenizerName; } - public TokenizerFactory tokenizerFactory() { + public Supplier tokenizerFactory() { return tokenizerFactory; } @@ -86,7 +87,7 @@ public int getOffsetGap(String field) { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = tokenizerFactory.create(); + Tokenizer tokenizer = tokenizerFactory.get(); TokenStream tokenStream = tokenizer; for (TokenFilterFactory tokenFilter : tokenFilters) { tokenStream = tokenFilter.create(tokenStream); diff --git a/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java b/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java index 4ba078051640a..96d1745e336ff 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; @@ -27,6 +28,7 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.function.Supplier; /** * A custom analyzer that is built out of a single {@link org.apache.lucene.analysis.Tokenizer} and a list @@ -46,14 +48,14 @@ public CustomAnalyzerProvider(IndexSettings indexSettings, this.environment = environment; } - public void build(final Map tokenizers, final Map charFilters, + public void build(final Map> tokenizers, final Map charFilters, final Map tokenFilters) { String tokenizerName = analyzerSettings.get("tokenizer"); if (tokenizerName == null) { throw new IllegalArgumentException("Custom Analyzer [" + name() + "] must be configured with a tokenizer"); } - TokenizerFactory tokenizer = tokenizers.get(tokenizerName); + Supplier tokenizer = tokenizers.get(tokenizerName); if (tokenizer == null) { throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find tokenizer under name [" + tokenizerName + "]"); } @@ -95,7 +97,7 @@ public void build(final Map tokenizers, final Map tokenizer, List tokenFilterList, List charFiltersList, Environment env) { if (tokenFilter instanceof SynonymGraphTokenFilterFactory) { diff --git a/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java b/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java index 13946be3a8d22..4a24bad505f17 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java @@ -19,12 +19,14 @@ package org.elasticsearch.index.analysis; +import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.function.Supplier; /** * A custom normalizer that is built out of a char and token filters. On the @@ -43,8 +45,8 @@ public CustomNormalizerProvider(IndexSettings indexSettings, this.analyzerSettings = settings; } - public void build(final String tokenizerName, final TokenizerFactory tokenizerFactory, final Map charFilters, - final Map tokenFilters) { + public void build(final String tokenizerName, final Supplier tokenizerFactory, + final Map charFilters, final Map tokenFilters) { if (analyzerSettings.get("tokenizer") != null) { throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer"); } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java index 131246d0b766a..f3e55a4d0c2b6 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java @@ -31,10 +31,10 @@ /** * Provides pre-configured, shared {@link Tokenizer}s. */ -public final class PreConfiguredTokenizer extends PreConfiguredAnalysisComponent { +public final class PreConfiguredTokenizer extends PreConfiguredAnalysisComponent> { /** * Create a pre-configured tokenizer that may not vary at all. - * + * * @param name the name of the tokenizer in the api * @param create builds the tokenizer * @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the @@ -48,7 +48,7 @@ public static PreConfiguredTokenizer singleton(String name, Supplier /** * Create a pre-configured tokenizer that may vary based on the Lucene version. - * + * * @param name the name of the tokenizer in the api * @param create builds the tokenizer * @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the @@ -62,7 +62,7 @@ public static PreConfiguredTokenizer luceneVersion(String name, Function create; private final Function multiTermComponent; - + private PreConfiguredTokenizer(String name, PreBuiltCacheFactory.CachingStrategy cache, Function create, @Nullable Function multiTermComponent) { super(name, cache); @@ -90,14 +90,14 @@ public boolean hasMultiTermComponent() { return multiTermComponent != null; } - private interface MultiTermAwareTokenizerFactory extends TokenizerFactory, MultiTermAwareComponent {} + private interface MultiTermAwareTokenizerFactory extends Supplier, MultiTermAwareComponent {} @Override - protected TokenizerFactory create(Version version) { + protected Supplier create(Version version) { if (multiTermComponent != null) { return new MultiTermAwareTokenizerFactory() { @Override - public Tokenizer create() { + public Tokenizer get() { return create.apply(version); } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/StandardTokenizerFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/StandardTokenizerFactory.java index ed8d2b452c2d5..1df5b212994eb 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/StandardTokenizerFactory.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/StandardTokenizerFactory.java @@ -31,12 +31,12 @@ public class StandardTokenizerFactory extends AbstractTokenizerFactory { private final int maxTokenLength; public StandardTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); } @Override - public Tokenizer create() { + public Tokenizer get() { StandardTokenizer tokenizer = new StandardTokenizer(); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; diff --git a/server/src/main/java/org/elasticsearch/index/analysis/TokenizerFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/TokenizerFactory.java deleted file mode 100644 index be96dbd65602b..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/analysis/TokenizerFactory.java +++ /dev/null @@ -1,26 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.apache.lucene.analysis.Tokenizer; - -public interface TokenizerFactory { // TODO replace with Supplier - Tokenizer create(); -} diff --git a/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java index 1ecdc797073cf..31591bc77dfb8 100644 --- a/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java +++ b/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java @@ -20,6 +20,7 @@ package org.elasticsearch.indices.analysis; import org.apache.lucene.analysis.LowerCaseFilter; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -44,7 +45,6 @@ import org.elasticsearch.index.analysis.StopAnalyzerProvider; import org.elasticsearch.index.analysis.StopTokenFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider; import org.elasticsearch.plugins.AnalysisPlugin; @@ -52,6 +52,7 @@ import java.util.List; import java.util.Locale; import java.util.Map; +import java.util.function.Supplier; import static java.util.Collections.unmodifiableMap; import static org.elasticsearch.plugins.AnalysisPlugin.requiresAnalysisSettings; @@ -77,7 +78,7 @@ public AnalysisModule(Environment environment, List plugins) thr NamedRegistry hunspellDictionaries = setupHunspellDictionaries(plugins); hunspellService = new HunspellService(environment.settings(), environment, hunspellDictionaries.getRegistry()); NamedRegistry> tokenFilters = setupTokenFilters(plugins, hunspellService); - NamedRegistry> tokenizers = setupTokenizers(plugins); + NamedRegistry>> tokenizers = setupTokenizers(plugins); NamedRegistry>> analyzers = setupAnalyzers(plugins); NamedRegistry>> normalizers = setupNormalizers(plugins); @@ -194,8 +195,8 @@ static Map setupPreConfiguredTokenizers(List> setupTokenizers(List plugins) { - NamedRegistry> tokenizers = new NamedRegistry<>("tokenizer"); + private NamedRegistry>> setupTokenizers(List plugins) { + NamedRegistry>> tokenizers = new NamedRegistry<>("tokenizer"); tokenizers.register("standard", StandardTokenizerFactory::new); tokenizers.extractAndRegister(plugins, AnalysisPlugin::getTokenizers); return tokenizers; diff --git a/server/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java b/server/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java index c85981f8dcb91..5ed2b82966096 100644 --- a/server/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java +++ b/server/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java @@ -33,12 +33,12 @@ import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import java.io.IOException; import java.util.List; import java.util.Map; +import java.util.function.Supplier; import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; @@ -81,7 +81,7 @@ default Map> getTokenFilters() { * Override to add additional {@link Tokenizer}s. See {@link #requiresAnalysisSettings(AnalysisProvider)} * how to on get the configuration from the index. */ - default Map> getTokenizers() { + default Map>> getTokenizers() { return emptyMap(); } diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java index c0404a47ab237..2c71cb5fb8c8c 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.Version; import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; @@ -38,7 +39,6 @@ import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.PreConfiguredCharFilter; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.indices.analysis.AnalysisModuleTests.AppendCharFilter; @@ -50,6 +50,7 @@ import java.io.Reader; import java.util.List; import java.util.Map; +import java.util.function.Supplier; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; @@ -110,7 +111,7 @@ public Map> getCharFilters() { } @Override - public Map> getTokenizers() { + public Map>> getTokenizers() { return singletonMap("keyword", (indexSettings, environment, name, settings) -> () -> new MockTokenizer(MockTokenizer.KEYWORD, false)); } diff --git a/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java b/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java index 1dcb2d4e39fd6..02d06d7fc289c 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java @@ -21,6 +21,7 @@ import org.apache.lucene.analysis.MockLowerCaseFilter; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; @@ -34,6 +35,7 @@ import java.util.List; import java.util.Map; import java.util.function.Function; +import java.util.function.Supplier; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; @@ -172,7 +174,7 @@ public Object getMultiTermComponent() { } @Override - public Map> getTokenizers() { + public Map>> getTokenizers() { return singletonMap("keyword", (indexSettings, environment, name, settings) -> () -> new MockTokenizer(MockTokenizer.KEYWORD, false)); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java index 56e587dc995da..5f9e54c8990bc 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java @@ -35,7 +35,6 @@ import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.MapperService.MergeReason; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.plugins.AnalysisPlugin; @@ -49,6 +48,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; +import java.util.function.Supplier; import static java.util.Collections.singletonList; import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; @@ -68,17 +68,10 @@ public List getPreConfiguredTokenFilters() { } @Override - public Map> getTokenizers() { - return singletonMap("keyword", (indexSettings, environment, name, settings) -> { - class Factory implements TokenizerFactory { - - @Override - public Tokenizer create() { - return new MockTokenizer(MockTokenizer.KEYWORD, false); - } - } - return new Factory(); - }); + public Map>> getTokenizers() { + return singletonMap( + "keyword", (indexSettings, environment, name, settings) -> () -> new MockTokenizer(MockTokenizer.KEYWORD, false) + ); } }; diff --git a/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java b/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java index 47f30e10ef912..0b528b0189e94 100644 --- a/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java +++ b/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java @@ -51,7 +51,6 @@ import org.elasticsearch.index.analysis.StopTokenFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.MyFilterTokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.test.ESTestCase; @@ -72,6 +71,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.function.Supplier; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; @@ -263,7 +263,7 @@ public List getPreConfiguredCharFilters() { } @Override - public Map> getTokenizers() { + public Map>> getTokenizers() { // Need mock keyword tokenizer here, because alpha / beta versions are broken up by the dash. return singletonMap("keyword", (indexSettings, environment, name, settings) -> () -> new MockTokenizer(MockTokenizer.KEYWORD, false)); diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index 9cdfc6776f883..d90bc62449942 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -37,6 +37,7 @@ import org.apache.logging.log4j.status.StatusConsoleListener; import org.apache.logging.log4j.status.StatusData; import org.apache.logging.log4j.status.StatusLogger; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TestRuleMarkFailure; @@ -87,7 +88,6 @@ import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MetadataFieldMapper; import org.elasticsearch.indices.IndicesModule; @@ -1349,12 +1349,12 @@ public static final class TestAnalysis { public final IndexAnalyzers indexAnalyzers; public final Map tokenFilter; - public final Map tokenizer; + public final Map> tokenizer; public final Map charFilter; public TestAnalysis(IndexAnalyzers indexAnalyzers, Map tokenFilter, - Map tokenizer, + Map> tokenizer, Map charFilter) { this.indexAnalyzers = indexAnalyzers; this.tokenFilter = tokenFilter; diff --git a/test/framework/src/main/java/org/elasticsearch/test/MockKeywordPlugin.java b/test/framework/src/main/java/org/elasticsearch/test/MockKeywordPlugin.java index fb9da1dad40fd..3754e580e9349 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/MockKeywordPlugin.java +++ b/test/framework/src/main/java/org/elasticsearch/test/MockKeywordPlugin.java @@ -20,12 +20,12 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import java.util.Map; +import java.util.function.Supplier; import static java.util.Collections.singletonMap; @@ -39,16 +39,8 @@ public class MockKeywordPlugin extends Plugin implements AnalysisPlugin { @Override - public Map> getTokenizers() { - return singletonMap("keyword", (indexSettings, environment, name, settings) -> { - class Factory implements TokenizerFactory { - - @Override - public Tokenizer create() { - return new MockTokenizer(MockTokenizer.KEYWORD, false); - } - } - return new Factory(); - }); + public Map>> getTokenizers() { + return singletonMap("keyword", (indexSettings, environment, name, settings) + -> () -> new MockTokenizer(MockTokenizer.KEYWORD, false)); } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java index 1c2808c70ffcf..58ce9ca7e5ea4 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java @@ -6,6 +6,7 @@ package org.elasticsearch.xpack.core.ml.job.config; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.Version; import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -29,7 +30,6 @@ import org.elasticsearch.index.analysis.CustomAnalyzer; import org.elasticsearch.index.analysis.CustomAnalyzerProvider; import org.elasticsearch.index.analysis.TokenFilterFactory; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction; import org.elasticsearch.xpack.core.ml.MlParserType; @@ -41,6 +41,7 @@ import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.function.Supplier; /** @@ -368,7 +369,7 @@ public Tuple toAnalyzer(AnalysisRegistry analysisRegistry, En List charFilterFactoryList = parseCharFilterFactories(analysisRegistry, environment); - Tuple tokenizerFactory = parseTokenizerFactory(analysisRegistry, + Tuple> tokenizerFactory = parseTokenizerFactory(analysisRegistry, environment); List tokenFilterFactoryList = parseTokenFilterFactories(analysisRegistry, @@ -424,13 +425,13 @@ private List parseCharFilterFactories(AnalysisRegistry analys * Get the tokenizer factory for the configured tokenizer. The configuration * can be the name of an out-of-the-box tokenizer, or a custom definition. */ - private Tuple parseTokenizerFactory(AnalysisRegistry analysisRegistry, + private Tuple> parseTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment) throws IOException { final String name; - final TokenizerFactory tokenizerFactory; + final Supplier tokenizerFactory; if (tokenizer.name != null) { name = tokenizer.name; - AnalysisModule.AnalysisProvider tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name); + AnalysisModule.AnalysisProvider> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name); if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("Failed to find global tokenizer under [" + name + "]"); } @@ -440,7 +441,7 @@ private Tuple parseTokenizerFactory(AnalysisRegistry a if (tokenizerTypeName == null) { throw new IllegalArgumentException("Missing [type] setting for tokenizer: " + tokenizer.definition); } - AnalysisModule.AnalysisProvider tokenizerFactoryFactory = + AnalysisModule.AnalysisProvider> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizerTypeName); if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("Failed to find global tokenizer under [" + tokenizerTypeName + "]"); @@ -458,7 +459,7 @@ private Tuple parseTokenizerFactory(AnalysisRegistry a * element can be the name of an out-of-the-box token filter, or a custom definition. */ private List parseTokenFilterFactories(AnalysisRegistry analysisRegistry, Environment environment, - Tuple tokenizerFactory, + Tuple> tokenizerFactory, List charFilterFactoryList) throws IOException { final List tokenFilterFactoryList = new ArrayList<>(); for (NameOrDefinition tokenFilter : tokenFilters) { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java index 796cae375e3a6..f2092ead39226 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java @@ -5,6 +5,7 @@ */ package org.elasticsearch.xpack.core; +import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionResponse; import org.elasticsearch.action.support.ActionFilter; @@ -33,7 +34,6 @@ import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.http.HttpServerTransport; import org.elasticsearch.index.IndexModule; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.ingest.Processor; @@ -337,8 +337,8 @@ public UnaryOperator> getIndexTemplateMetaDat } @Override - public Map> getTokenizers() { - Map> tokenizers = new HashMap<>(); + public Map>> getTokenizers() { + Map>> tokenizers = new HashMap<>(); filterPlugins(AnalysisPlugin.class).stream().forEach(p -> tokenizers.putAll(p.getTokenizers())); return tokenizers; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index 3d1011c47e2a8..b3d914b0cf3f8 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -6,6 +6,7 @@ package org.elasticsearch.xpack.ml; import org.apache.logging.log4j.Logger; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.SetOnce; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.Version; @@ -35,7 +36,6 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.monitor.os.OsProbe; @@ -581,7 +581,7 @@ public List> getExecutorBuilders(Settings settings) { } @Override - public Map> getTokenizers() { + public Map>> getTokenizers() { return Collections.singletonMap(MlClassicTokenizer.NAME, MlClassicTokenizerFactory::new); } @Override diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/MlClassicTokenizerFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/MlClassicTokenizerFactory.java index 40fee1f40f138..24ee6634d4f6b 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/MlClassicTokenizerFactory.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/MlClassicTokenizerFactory.java @@ -20,11 +20,11 @@ public class MlClassicTokenizerFactory extends AbstractTokenizerFactory { public MlClassicTokenizerFactory(IndexSettings indexSettings, Environment environment, String name, Settings settings) { - super(indexSettings, name, settings); + super(indexSettings, settings); } @Override - public Tokenizer create() { + public Tokenizer get() { return new MlClassicTokenizer(); } } From 65b8f823b8cf7af95eb2860123bdbbbef69ad65d Mon Sep 17 00:00:00 2001 From: Armin Date: Mon, 16 Jul 2018 16:04:50 +0200 Subject: [PATCH 2/3] Revert converting TokenizerFactory to Supplier --- .../common/CharGroupTokenizerFactory.java | 2 +- .../common/ClassicTokenizerFactory.java | 2 +- .../analysis/common/CommonAnalysisPlugin.java | 7 ++-- .../common/EdgeNGramTokenizerFactory.java | 2 +- .../common/KeywordTokenizerFactory.java | 2 +- .../common/LetterTokenizerFactory.java | 2 +- .../common/LowerCaseTokenizerFactory.java | 2 +- .../common/NGramTokenizerFactory.java | 2 +- .../common/PathHierarchyTokenizerFactory.java | 2 +- .../common/PatternTokenizerFactory.java | 2 +- .../SimplePatternSplitTokenizerFactory.java | 2 +- .../common/SimplePatternTokenizerFactory.java | 2 +- .../analysis/common/ThaiTokenizerFactory.java | 2 +- .../common/UAX29URLEmailTokenizerFactory.java | 2 +- .../common/WhitespaceTokenizerFactory.java | 2 +- .../CharGroupTokenizerFactoryTests.java | 6 ++-- .../common/NGramTokenizerFactoryTests.java | 16 +++++----- .../PathHierarchyTokenizerFactoryTests.java | 14 ++++---- .../WhitespaceTokenizerFactoryTests.java | 10 +++--- .../index/analysis/IcuTokenizerFactory.java | 2 +- .../analysis/icu/AnalysisICUPlugin.java | 5 ++- .../analysis/IcuTokenizerFactoryTests.java | 14 ++++---- .../analysis/SimpleIcuAnalysisTests.java | 4 +-- .../analysis/KuromojiTokenizerFactory.java | 2 +- .../kuromoji/AnalysisKuromojiPlugin.java | 5 ++- .../index/analysis/KuromojiAnalysisTests.java | 21 ++++++------ .../index/analysis/NoriTokenizerFactory.java | 2 +- .../analysis/nori/AnalysisNoriPlugin.java | 5 ++- .../index/analysis/NoriAnalysisTests.java | 5 ++- ...SmartChineseTokenizerTokenizerFactory.java | 2 +- .../smartcn/AnalysisSmartChinesePlugin.java | 7 ++-- .../SimpleSmartChineseAnalysisTests.java | 4 +-- .../analyze/TransportAnalyzeAction.java | 32 +++++++++---------- .../analysis/AbstractTokenizerFactory.java | 5 +-- .../index/analysis/AnalysisRegistry.java | 32 +++++++++---------- .../index/analysis/CustomAnalyzer.java | 13 ++++---- .../analysis/CustomAnalyzerProvider.java | 8 ++--- .../analysis/CustomNormalizerProvider.java | 6 ++-- .../analysis/PreConfiguredTokenizer.java | 8 ++--- .../analysis/StandardTokenizerFactory.java | 2 +- .../index/analysis/TokenizerFactory.java | 26 +++++++++++++++ .../indices/analysis/AnalysisModule.java | 9 +++--- .../elasticsearch/plugins/AnalysisPlugin.java | 4 +-- .../indices/TransportAnalyzeActionTests.java | 5 ++- .../index/analysis/CustomNormalizerTests.java | 4 +-- .../index/mapper/KeywordFieldMapperTests.java | 17 +++++++--- .../indices/analysis/AnalysisModuleTests.java | 4 +-- .../org/elasticsearch/test/ESTestCase.java | 6 ++-- .../elasticsearch/test/MockKeywordPlugin.java | 16 +++++++--- .../config/CategorizationAnalyzerConfig.java | 15 ++++----- .../core/LocalStateCompositeXPackPlugin.java | 6 ++-- .../xpack/ml/MachineLearning.java | 4 +-- .../MlClassicTokenizerFactory.java | 2 +- 53 files changed, 198 insertions(+), 185 deletions(-) create mode 100644 server/src/main/java/org/elasticsearch/index/analysis/TokenizerFactory.java diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactory.java index 3614574d8927a..4920b7daae852 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactory.java @@ -109,7 +109,7 @@ private char parseEscapedChar(final String s) { } @Override - public Tokenizer get() { + public Tokenizer create() { return new CharTokenizer() { @Override protected boolean isTokenChar(int c) { diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ClassicTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ClassicTokenizerFactory.java index 6e396d6ad2efd..27316f4cde5e7 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ClassicTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ClassicTokenizerFactory.java @@ -40,7 +40,7 @@ public class ClassicTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer get() { + public Tokenizer create() { ClassicTokenizer tokenizer = new ClassicTokenizer(); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 60cda6ce5f09b..d95af920a307b 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -24,7 +24,6 @@ import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; import org.apache.lucene.analysis.ar.ArabicStemFilter; @@ -123,6 +122,7 @@ import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy; import org.elasticsearch.plugins.AnalysisPlugin; @@ -134,7 +134,6 @@ import java.util.List; import java.util.Map; import java.util.TreeMap; -import java.util.function.Supplier; import static org.elasticsearch.plugins.AnalysisPlugin.requiresAnalysisSettings; @@ -263,8 +262,8 @@ public Map> getCharFilters() { } @Override - public Map>> getTokenizers() { - Map>> tokenizers = new TreeMap<>(); + public Map> getTokenizers() { + Map> tokenizers = new TreeMap<>(); tokenizers.put("simple_pattern", SimplePatternTokenizerFactory::new); tokenizers.put("simple_pattern_split", SimplePatternSplitTokenizerFactory::new); tokenizers.put("thai", ThaiTokenizerFactory::new); diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerFactory.java index d58316055fba3..9bb17abf0cd02 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/EdgeNGramTokenizerFactory.java @@ -43,7 +43,7 @@ public class EdgeNGramTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer get() { + public Tokenizer create() { if (matcher == null) { return new EdgeNGramTokenizer(minGram, maxGram); } else { diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KeywordTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KeywordTokenizerFactory.java index f174ea4b48d0d..e4bf2c8c4ad4e 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KeywordTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/KeywordTokenizerFactory.java @@ -36,7 +36,7 @@ public class KeywordTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer get() { + public Tokenizer create() { return new KeywordTokenizer(bufferSize); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LetterTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LetterTokenizerFactory.java index 6c92c13d2b126..cba30cb63c36b 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LetterTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LetterTokenizerFactory.java @@ -33,7 +33,7 @@ public class LetterTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer get() { + public Tokenizer create() { return new LetterTokenizer(); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenizerFactory.java index 6b3eb513d0f90..8c913a33cfe4c 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/LowerCaseTokenizerFactory.java @@ -34,7 +34,7 @@ public class LowerCaseTokenizerFactory extends AbstractTokenizerFactory implemen } @Override - public Tokenizer get() { + public Tokenizer create() { return new LowerCaseTokenizer(); } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenizerFactory.java index 903e854876449..b00797428b79a 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/NGramTokenizerFactory.java @@ -105,7 +105,7 @@ static CharMatcher parseTokenChars(List characterClasses) { } @Override - public Tokenizer get() { + public Tokenizer create() { if (matcher == null) { return new NGramTokenizer(minGram, maxGram); } else { diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactory.java index e89ecddbd1187..5b966c1c3b8df 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactory.java @@ -61,7 +61,7 @@ public class PathHierarchyTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer get() { + public Tokenizer create() { if (reverse) { return new ReversePathHierarchyTokenizer(bufferSize, delimiter, replacement, skip); } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternTokenizerFactory.java index ba4a27d29d09e..11ba7e44db0e4 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/PatternTokenizerFactory.java @@ -47,7 +47,7 @@ public class PatternTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer get() { + public Tokenizer create() { return new PatternTokenizer(pattern, group); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternSplitTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternSplitTokenizerFactory.java index 8846a217d1232..0faf407829577 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternSplitTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternSplitTokenizerFactory.java @@ -37,7 +37,7 @@ public SimplePatternSplitTokenizerFactory(IndexSettings indexSettings, Environme } @Override - public Tokenizer get() { + public Tokenizer create() { return new SimplePatternSplitTokenizer(pattern); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternTokenizerFactory.java index ea25d6e404003..67aee333d0ffd 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/SimplePatternTokenizerFactory.java @@ -37,7 +37,7 @@ public SimplePatternTokenizerFactory(IndexSettings indexSettings, Environment en } @Override - public Tokenizer get() { + public Tokenizer create() { return new SimplePatternTokenizer(pattern); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ThaiTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ThaiTokenizerFactory.java index cf551f3422eb8..861ade079a08e 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ThaiTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/ThaiTokenizerFactory.java @@ -36,7 +36,7 @@ public class ThaiTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer get() { + public Tokenizer create() { return new ThaiTokenizer(); } } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UAX29URLEmailTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UAX29URLEmailTokenizerFactory.java index 075d6f88ab838..cd02eec24b42c 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UAX29URLEmailTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/UAX29URLEmailTokenizerFactory.java @@ -37,7 +37,7 @@ public class UAX29URLEmailTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer get() { + public Tokenizer create() { UAX29URLEmailTokenizer tokenizer = new UAX29URLEmailTokenizer(); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactory.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactory.java index 548cd2e9f810c..7ce6a361cbad2 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactory.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactory.java @@ -39,7 +39,7 @@ public class WhitespaceTokenizerFactory extends AbstractTokenizerFactory { } @Override - public Tokenizer get() { + public Tokenizer create() { return new WhitespaceTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, maxTokenLength); } } diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java index f2ccff468c5e3..1447531aa8731 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CharGroupTokenizerFactoryTests.java @@ -43,7 +43,7 @@ public void testParseTokenChars() { new String[] { "commas" }, new String[] { "a", "b", "c", "\\$" })) { final Settings settings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", conf).build(); - expectThrows(RuntimeException.class, () -> new CharGroupTokenizerFactory(indexProperties, null, name, settings).get()); + expectThrows(RuntimeException.class, () -> new CharGroupTokenizerFactory(indexProperties, null, name, settings).create()); } for (String[] conf : Arrays.asList( @@ -56,7 +56,7 @@ public void testParseTokenChars() { new String[] { "\\r" }, new String[] { "f", "o", "o", "symbol" })) { final Settings settings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", Arrays.asList(conf)).build(); - new CharGroupTokenizerFactory(indexProperties, null, name, settings).get(); + new CharGroupTokenizerFactory(indexProperties, null, name, settings).create(); // no exception } } @@ -67,7 +67,7 @@ public void testTokenization() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); final Settings settings = newAnalysisSettingsBuilder().putList("tokenize_on_chars", "whitespace", ":", "\\u0024").build(); Tokenizer tokenizer = new CharGroupTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), - null, name, settings).get(); + null, name, settings).create(); tokenizer.setReader(new StringReader("foo bar $34 test:test2")); assertTokenStreamContents(tokenizer, new String[] {"foo", "bar", "34", "test", "test2"}); } diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java index 68da0aae933db..1cf6ef4696d04 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/NGramTokenizerFactoryTests.java @@ -51,7 +51,7 @@ public void testParseTokenChars() { final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3) .put("token_chars", tokenChars).build(); try { - new NGramTokenizerFactory(indexProperties, null, name, settings).get(); + new NGramTokenizerFactory(indexProperties, null, name, settings).create(); fail(); } catch (IllegalArgumentException expected) { // OK @@ -62,7 +62,7 @@ public void testParseTokenChars() { .put("token_chars", tokenChars).build(); indexProperties = IndexSettingsModule.newIndexSettings(index, indexSettings); - new NGramTokenizerFactory(indexProperties, null, name, settings).get(); + new NGramTokenizerFactory(indexProperties, null, name, settings).create(); // no exception } } @@ -75,7 +75,7 @@ public void testNoTokenChars() throws IOException { final Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 4) .putList("token_chars", new String[0]).build(); Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings) - .get(); + .create(); tokenizer.setReader(new StringReader("1.34")); assertTokenStreamContents(tokenizer, new String[] {"1.", "1.3", "1.34", ".3", ".34", "34"}); } @@ -88,13 +88,13 @@ public void testPreTokenization() throws IOException { Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3) .put("token_chars", "letter,digit").build(); Tokenizer tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings) - .get(); + .create(); tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f ")); assertTokenStreamContents(tokenizer, new String[] {"Åb", "Åbc", "bc", "dé", "déf", "éf", "g\uD801\uDC00", "g\uD801\uDC00f", "\uD801\uDC00f"}); settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3) .put("token_chars", "letter,digit,punctuation,whitespace,symbol").build(); - tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).get(); + tokenizer = new NGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(); tokenizer.setReader(new StringReader(" a!$ 9")); assertTokenStreamContents(tokenizer, new String[] {" a", " a!", "a!", "a!$", "!$", "!$ ", "$ ", "$ 9", " 9"}); @@ -107,14 +107,14 @@ public void testPreTokenizationEdge() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); Settings settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3).put("token_chars", "letter,digit").build(); Tokenizer tokenizer = - new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).get(); + new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings).create(); tokenizer.setReader(new StringReader("Åbc déf g\uD801\uDC00f ")); assertTokenStreamContents(tokenizer, new String[] {"Åb", "Åbc", "dé", "déf", "g\uD801\uDC00", "g\uD801\uDC00f"}); settings = newAnalysisSettingsBuilder().put("min_gram", 2).put("max_gram", 3) .put("token_chars", "letter,digit,punctuation,whitespace,symbol").build(); tokenizer = new EdgeNGramTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, name, settings) - .get(); + .create(); tokenizer.setReader(new StringReader(" a!$ 9")); assertTokenStreamContents(tokenizer, new String[] {" a", " a!"}); @@ -163,7 +163,7 @@ public void testMaxNGramDiffException() throws Exception{ final Settings settings = newAnalysisSettingsBuilder().put("min_gram", min_gram).put("max_gram", max_gram).build(); IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> - new NGramTokenizerFactory(indexProperties, null, name, settings).get()); + new NGramTokenizerFactory(indexProperties, null, name, settings).create()); assertEquals( "The difference between max_gram and min_gram in NGram Tokenizer must be less than or equal to: [" + maxAllowedNgramDiff + "] but was [" + ngramDiff + "]. This limit can be set by changing the [" diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java index 91e3990a4f317..0b545d3355201 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/PathHierarchyTokenizerFactoryTests.java @@ -36,7 +36,7 @@ public void testDefaults() throws IOException { final Index index = new Index("test", "_na_"); final Settings indexSettings = newAnalysisSettingsBuilder().build(); Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", Settings.EMPTY).get(); + "path-hierarchy-tokenizer", Settings.EMPTY).create(); tokenizer.setReader(new StringReader("/one/two/three")); assertTokenStreamContents(tokenizer, new String[] {"/one", "/one/two", "/one/two/three"}); } @@ -46,7 +46,7 @@ public void testReverse() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); Settings settings = newAnalysisSettingsBuilder().put("reverse", true).build(); Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).get(); + "path-hierarchy-tokenizer", settings).create(); tokenizer.setReader(new StringReader("/one/two/three")); assertTokenStreamContents(tokenizer, new String[] {"/one/two/three", "one/two/three", "two/three", "three"}); } @@ -56,7 +56,7 @@ public void testDelimiter() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); Settings settings = newAnalysisSettingsBuilder().put("delimiter", "-").build(); Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).get(); + "path-hierarchy-tokenizer", settings).create(); tokenizer.setReader(new StringReader("/one/two/three")); assertTokenStreamContents(tokenizer, new String[] {"/one/two/three"}); tokenizer.setReader(new StringReader("one-two-three")); @@ -68,7 +68,7 @@ public void testReplace() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); Settings settings = newAnalysisSettingsBuilder().put("replacement", "-").build(); Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).get(); + "path-hierarchy-tokenizer", settings).create(); tokenizer.setReader(new StringReader("/one/two/three")); assertTokenStreamContents(tokenizer, new String[] {"-one", "-one-two", "-one-two-three"}); tokenizer.setReader(new StringReader("one-two-three")); @@ -80,7 +80,7 @@ public void testSkip() throws IOException { final Settings indexSettings = newAnalysisSettingsBuilder().build(); Settings settings = newAnalysisSettingsBuilder().put("skip", 2).build(); Tokenizer tokenizer = new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).get(); + "path-hierarchy-tokenizer", settings).create(); tokenizer.setReader(new StringReader("/one/two/three/four/five")); assertTokenStreamContents(tokenizer, new String[] {"/three", "/three/four", "/three/four/five"}); } @@ -93,7 +93,7 @@ public void testDelimiterExceptions() { Settings settings = newAnalysisSettingsBuilder().put("delimiter", delimiter).build(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).get()); + "path-hierarchy-tokenizer", settings).create()); assertEquals("delimiter must be a one char value", e.getMessage()); } { @@ -101,7 +101,7 @@ public void testDelimiterExceptions() { Settings settings = newAnalysisSettingsBuilder().put("replacement", replacement).build(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new PathHierarchyTokenizerFactory(IndexSettingsModule.newIndexSettings(index, indexSettings), null, - "path-hierarchy-tokenizer", settings).get()); + "path-hierarchy-tokenizer", settings).create()); assertEquals("replacement must be a one char value", e.getMessage()); } } diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactoryTests.java index 6852687009053..f34b694fbf60f 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/WhitespaceTokenizerFactoryTests.java @@ -42,7 +42,7 @@ public void testSimpleWhiteSpaceTokenizer() throws IOException { final Settings indexSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(); IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(new Index("test", "_na_"), indexSettings); WhitespaceTokenizer tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", - Settings.EMPTY).get(); + Settings.EMPTY).create(); try (Reader reader = new StringReader("one, two, three")) { tokenizer.setReader(reader); @@ -55,7 +55,7 @@ public void testMaxTokenLength() throws IOException { IndexSettings indexProperties = IndexSettingsModule.newIndexSettings(new Index("test", "_na_"), indexSettings); final Settings settings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 2).build(); WhitespaceTokenizer tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", - settings).get(); + settings).create(); try (Reader reader = new StringReader("one, two, three")) { tokenizer.setReader(reader); assertTokenStreamContents(tokenizer, new String[] { "on", "e,", "tw", "o,", "th", "re", "e" }); @@ -63,7 +63,7 @@ public void testMaxTokenLength() throws IOException { final Settings defaultSettings = Settings.EMPTY; tokenizer = (WhitespaceTokenizer) new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", defaultSettings) - .get(); + .create(); String veryLongToken = RandomStrings.randomAsciiAlphanumOfLength(random(), 256); try (Reader reader = new StringReader(veryLongToken)) { tokenizer.setReader(reader); @@ -72,12 +72,12 @@ public void testMaxTokenLength() throws IOException { final Settings tooLongSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, 1024 * 1024 + 1).build(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, - () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", tooLongSettings).get()); + () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", tooLongSettings).create()); assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: 1048577", e.getMessage()); final Settings negativeSettings = Settings.builder().put(WhitespaceTokenizerFactory.MAX_TOKEN_LENGTH, -1).build(); e = expectThrows(IllegalArgumentException.class, - () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", negativeSettings).get()); + () -> new WhitespaceTokenizerFactory(indexProperties, null, "whitespace_maxlen", negativeSettings).create()); assertEquals("maxTokenLen must be greater than 0 and less than 1048576 passed: -1", e.getMessage()); } } diff --git a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java index 213ecc223dbfe..3f8b9296aa02c 100644 --- a/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java +++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/index/analysis/IcuTokenizerFactory.java @@ -52,7 +52,7 @@ public IcuTokenizerFactory(IndexSettings indexSettings, Environment environment, } @Override - public Tokenizer get() { + public Tokenizer create() { if (config == null) { return new ICUTokenizer(); }else{ diff --git a/plugins/analysis-icu/src/main/java/org/elasticsearch/plugin/analysis/icu/AnalysisICUPlugin.java b/plugins/analysis-icu/src/main/java/org/elasticsearch/plugin/analysis/icu/AnalysisICUPlugin.java index 7322199919c43..58ebdc8e2a801 100644 --- a/plugins/analysis-icu/src/main/java/org/elasticsearch/plugin/analysis/icu/AnalysisICUPlugin.java +++ b/plugins/analysis-icu/src/main/java/org/elasticsearch/plugin/analysis/icu/AnalysisICUPlugin.java @@ -21,7 +21,6 @@ import static java.util.Collections.singletonMap; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.IcuCollationTokenFilterFactory; @@ -31,6 +30,7 @@ import org.elasticsearch.index.analysis.IcuTokenizerFactory; import org.elasticsearch.index.analysis.IcuTransformTokenFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.ICUCollationKeywordFieldMapper; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; @@ -43,7 +43,6 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import java.util.function.Supplier; public class AnalysisICUPlugin extends Plugin implements AnalysisPlugin, MapperPlugin { @Override @@ -62,7 +61,7 @@ public Map> getTokenFilters() { } @Override - public Map>> getTokenizers() { + public Map> getTokenizers() { return singletonMap("icu_tokenizer", IcuTokenizerFactory::new); } diff --git a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/IcuTokenizerFactoryTests.java b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/IcuTokenizerFactoryTests.java index 9d615b15ae7ca..8cce4c13542c6 100644 --- a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/IcuTokenizerFactoryTests.java +++ b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/IcuTokenizerFactoryTests.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -35,7 +34,6 @@ import java.io.StringReader; import java.nio.file.Files; import java.nio.file.Path; -import java.util.function.Supplier; import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; @@ -44,8 +42,8 @@ public class IcuTokenizerFactoryTests extends ESTestCase { public void testSimpleIcuTokenizer() throws IOException { TestAnalysis analysis = createTestAnalysis(); - Supplier tokenizerFactory = analysis.tokenizer.get("icu_tokenizer"); - ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.get(); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("icu_tokenizer"); + ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create(); Reader reader = new StringReader("向日葵, one-two"); tokenizer.setReader(reader); @@ -56,8 +54,8 @@ public void testIcuCustomizeRuleFile() throws IOException { TestAnalysis analysis = createTestAnalysis(); // test the tokenizer with single rule file - Supplier tokenizerFactory = analysis.tokenizer.get("user_rule_tokenizer"); - ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.get(); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("user_rule_tokenizer"); + ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create(); Reader reader = new StringReader ("One-two punch. Brang-, not brung-it. This one--not that one--is the right one, -ish."); @@ -71,8 +69,8 @@ public void testMultipleIcuCustomizeRuleFiles() throws IOException { TestAnalysis analysis = createTestAnalysis(); // test the tokenizer with two rule files - Supplier tokenizerFactory = analysis.tokenizer.get("multi_rule_tokenizer"); - ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.get(); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("multi_rule_tokenizer"); + ICUTokenizer tokenizer = (ICUTokenizer) tokenizerFactory.create(); StringReader reader = new StringReader ("Some English. Немного русский. ข้อความภาษาไทยเล็ก ๆ น้อย ๆ More English."); diff --git a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java index edff9e86d4da0..3cd675c221a27 100644 --- a/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java +++ b/plugins/analysis-icu/src/test/java/org/elasticsearch/index/analysis/SimpleIcuAnalysisTests.java @@ -19,14 +19,12 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.plugin.analysis.icu.AnalysisICUPlugin; import org.elasticsearch.test.ESTestCase; import java.io.IOException; -import java.util.function.Supplier; import static org.hamcrest.Matchers.instanceOf; @@ -34,7 +32,7 @@ public class SimpleIcuAnalysisTests extends ESTestCase { public void testDefaultsIcuAnalysis() throws IOException { TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisICUPlugin()); - Supplier tokenizerFactory = analysis.tokenizer.get("icu_tokenizer"); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("icu_tokenizer"); assertThat(tokenizerFactory, instanceOf(IcuTokenizerFactory.class)); TokenFilterFactory filterFactory = analysis.tokenFilter.get("icu_normalizer"); diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java index c4f927b4d5a09..e9268f7306512 100644 --- a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java +++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiTokenizerFactory.java @@ -86,7 +86,7 @@ public static JapaneseTokenizer.Mode getMode(Settings settings) { } @Override - public Tokenizer get() { + public Tokenizer create() { JapaneseTokenizer t = new JapaneseTokenizer(userDictionary, discartPunctuation, mode); int nBestCost = this.nBestCost; if (nBestExamples != null) { diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java index e662e11517d0d..c4b4db53c4a2c 100644 --- a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java +++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/plugin/analysis/kuromoji/AnalysisKuromojiPlugin.java @@ -20,7 +20,6 @@ package org.elasticsearch.plugin.analysis.kuromoji; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.JapaneseStopTokenFilterFactory; @@ -33,13 +32,13 @@ import org.elasticsearch.index.analysis.KuromojiReadingFormFilterFactory; import org.elasticsearch.index.analysis.KuromojiTokenizerFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import java.util.HashMap; import java.util.Map; -import java.util.function.Supplier; import static java.util.Collections.singletonMap; @@ -62,7 +61,7 @@ public Map> getTokenFilters() { } @Override - public Map>> getTokenizers() { + public Map> getTokenizers() { return singletonMap("kuromoji_tokenizer", KuromojiTokenizerFactory::new); } diff --git a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index df950237d5030..b1b23f7f1b6a4 100644 --- a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -38,7 +38,6 @@ import java.io.StringReader; import java.nio.file.Files; import java.nio.file.Path; -import java.util.function.Supplier; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; @@ -49,7 +48,7 @@ public class KuromojiAnalysisTests extends ESTestCase { public void testDefaultsKuromojiAnalysis() throws IOException { TestAnalysis analysis = createTestAnalysis(); - Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_tokenizer"); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_tokenizer"); assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class)); TokenFilterFactory filterFactory = analysis.tokenFilter.get("kuromoji_part_of_speech"); @@ -246,11 +245,11 @@ private String readFully(Reader reader) throws IOException { public void testKuromojiUserDict() throws IOException { TestAnalysis analysis = createTestAnalysis(); - Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_user_dict"); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_user_dict"); String source = "私は制限スピードを超える。"; String[] expected = new String[]{"私", "は", "制限スピード", "を", "超える"}; - Tokenizer tokenizer = tokenizerFactory.get(); + Tokenizer tokenizer = tokenizerFactory.create(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenizer, expected); } @@ -258,39 +257,39 @@ public void testKuromojiUserDict() throws IOException { // fix #59 public void testKuromojiEmptyUserDict() throws IOException { TestAnalysis analysis = createTestAnalysis(); - Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_empty_user_dict"); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_empty_user_dict"); assertThat(tokenizerFactory, instanceOf(KuromojiTokenizerFactory.class)); } public void testNbestCost() throws IOException { TestAnalysis analysis = createTestAnalysis(); - Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_cost"); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_cost"); String source = "鳩山積み"; String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"}; - Tokenizer tokenizer = tokenizerFactory.get(); + Tokenizer tokenizer = tokenizerFactory.create(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenizer, expected); } public void testNbestExample() throws IOException { TestAnalysis analysis = createTestAnalysis(); - Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_examples"); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_examples"); String source = "鳩山積み"; String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"}; - Tokenizer tokenizer = tokenizerFactory.get(); + Tokenizer tokenizer = tokenizerFactory.create(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenizer, expected); } public void testNbestBothOptions() throws IOException { TestAnalysis analysis = createTestAnalysis(); - Supplier tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_both"); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("kuromoji_nbest_both"); String source = "鳩山積み"; String[] expected = new String[] {"鳩", "鳩山", "山積み", "積み"}; - Tokenizer tokenizer = tokenizerFactory.get(); + Tokenizer tokenizer = tokenizerFactory.create(); tokenizer.setReader(new StringReader(source)); assertSimpleTSOutput(tokenizer, expected); diff --git a/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java b/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java index 60ae7dc8bd660..9295ed95c3fb8 100644 --- a/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java +++ b/plugins/analysis-nori/src/main/java/org/elasticsearch/index/analysis/NoriTokenizerFactory.java @@ -65,7 +65,7 @@ public static KoreanTokenizer.DecompoundMode getMode(Settings settings) { } @Override - public Tokenizer get() { + public Tokenizer create() { return new KoreanTokenizer(KoreanTokenizer.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, userDictionary, decompoundMode, false); } diff --git a/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/AnalysisNoriPlugin.java b/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/AnalysisNoriPlugin.java index 4fc9a93fab96f..6e9baa7acd26c 100644 --- a/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/AnalysisNoriPlugin.java +++ b/plugins/analysis-nori/src/main/java/org/elasticsearch/plugin/analysis/nori/AnalysisNoriPlugin.java @@ -20,20 +20,19 @@ package org.elasticsearch.plugin.analysis.nori; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.NoriAnalyzerProvider; import org.elasticsearch.index.analysis.NoriPartOfSpeechStopFilterFactory; import org.elasticsearch.index.analysis.NoriReadingFormFilterFactory; import org.elasticsearch.index.analysis.NoriTokenizerFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import java.util.HashMap; import java.util.Map; -import java.util.function.Supplier; import static java.util.Collections.singletonMap; @@ -47,7 +46,7 @@ public Map> getTokenFilters() { } @Override - public Map>> getTokenizers() { + public Map> getTokenizers() { return singletonMap("nori_tokenizer", NoriTokenizerFactory::new); } diff --git a/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java b/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java index a2202744440e1..fa5858a7bbbb8 100644 --- a/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java +++ b/plugins/analysis-nori/src/test/java/org/elasticsearch/index/analysis/NoriAnalysisTests.java @@ -37,7 +37,6 @@ import java.io.StringReader; import java.nio.file.Files; import java.nio.file.Path; -import java.util.function.Supplier; import static org.hamcrest.Matchers.instanceOf; @@ -45,7 +44,7 @@ public class NoriAnalysisTests extends ESTokenStreamTestCase { public void testDefaultsNoriAnalysis() throws IOException { TestAnalysis analysis = createTestAnalysis(Settings.EMPTY); - Supplier tokenizerFactory = analysis.tokenizer.get("nori_tokenizer"); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("nori_tokenizer"); assertThat(tokenizerFactory, instanceOf(NoriTokenizerFactory.class)); TokenFilterFactory filterFactory = analysis.tokenFilter.get("nori_part_of_speech"); @@ -98,7 +97,7 @@ public void testNoriTokenizer() throws Exception { .put("index.analysis.tokenizer.my_tokenizer.decompound_mode", "mixed") .build(); TestAnalysis analysis = createTestAnalysis(settings); - Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").get(); + Tokenizer tokenizer = analysis.tokenizer.get("my_tokenizer").create(); tokenizer.setReader(new StringReader("뿌리가 깊은 나무")); assertTokenStreamContents(tokenizer, new String[] {"뿌리", "가", "깊", "은", "나무"}); tokenizer.setReader(new StringReader("가늠표")); diff --git a/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseTokenizerTokenizerFactory.java b/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseTokenizerTokenizerFactory.java index 832e914bb2dda..560bce9db2701 100644 --- a/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseTokenizerTokenizerFactory.java +++ b/plugins/analysis-smartcn/src/main/java/org/elasticsearch/index/analysis/SmartChineseTokenizerTokenizerFactory.java @@ -32,7 +32,7 @@ public SmartChineseTokenizerTokenizerFactory(IndexSettings indexSettings, Enviro } @Override - public Tokenizer get() { + public Tokenizer create() { return new HMMChineseTokenizer(); } } diff --git a/plugins/analysis-smartcn/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java b/plugins/analysis-smartcn/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java index ad0b99b702c1c..b11a157c149d6 100644 --- a/plugins/analysis-smartcn/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java +++ b/plugins/analysis-smartcn/src/main/java/org/elasticsearch/plugin/analysis/smartcn/AnalysisSmartChinesePlugin.java @@ -20,19 +20,18 @@ package org.elasticsearch.plugin.analysis.smartcn; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.SmartChineseAnalyzerProvider; import org.elasticsearch.index.analysis.SmartChineseNoOpTokenFilterFactory; import org.elasticsearch.index.analysis.SmartChineseTokenizerTokenizerFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import java.util.HashMap; import java.util.Map; -import java.util.function.Supplier; import static java.util.Collections.singletonMap; @@ -44,8 +43,8 @@ public Map> getTokenFilters() { } @Override - public Map>> getTokenizers() { - Map>> extra = new HashMap<>(); + public Map> getTokenizers() { + Map> extra = new HashMap<>(); extra.put("smartcn_tokenizer", SmartChineseTokenizerTokenizerFactory::new); // This is an alias to "smartcn_tokenizer"; it's here for backwards compat extra.put("smartcn_sentence", SmartChineseTokenizerTokenizerFactory::new); diff --git a/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseAnalysisTests.java b/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseAnalysisTests.java index 2bb0586b62b9d..e2d6f6db51376 100644 --- a/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseAnalysisTests.java +++ b/plugins/analysis-smartcn/src/test/java/org/elasticsearch/index/analysis/SimpleSmartChineseAnalysisTests.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.plugin.analysis.smartcn.AnalysisSmartChinesePlugin; @@ -27,7 +26,6 @@ import org.hamcrest.MatcherAssert; import java.io.IOException; -import java.util.function.Supplier; import static org.hamcrest.Matchers.instanceOf; @@ -35,7 +33,7 @@ public class SimpleSmartChineseAnalysisTests extends ESTestCase { public void testDefaultsIcuAnalysis() throws IOException { final TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), Settings.EMPTY, new AnalysisSmartChinesePlugin()); - Supplier tokenizerFactory = analysis.tokenizer.get("smartcn_tokenizer"); + TokenizerFactory tokenizerFactory = analysis.tokenizer.get("smartcn_tokenizer"); MatcherAssert.assertThat(tokenizerFactory, instanceOf(SmartChineseTokenizerTokenizerFactory.class)); } } diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index 43b60b2207cfb..35f1f725b65ad 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -53,6 +53,7 @@ import org.elasticsearch.index.analysis.MultiTermAwareComponent; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.KeywordFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.shard.ShardId; @@ -71,7 +72,6 @@ import java.util.Map; import java.util.Set; import java.util.TreeMap; -import java.util.function.Supplier; /** * Transport action used to execute analyze requests @@ -185,7 +185,7 @@ public static AnalyzeResponse analyze(AnalyzeRequest request, String field, Anal } } else if (request.tokenizer() != null) { final IndexSettings indexSettings = indexAnalyzers == null ? null : indexAnalyzers.getIndexSettings(); - Tuple> tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers, + Tuple tokenizerFactory = parseTokenizerFactory(request, indexAnalyzers, analysisRegistry, environment); List charFilterFactoryList = @@ -213,7 +213,7 @@ public static AnalyzeResponse analyze(AnalyzeRequest request, String field, Anal parseCharFilterFactories(request, indexSettings, analysisRegistry, environment, true); final String keywordTokenizerName = "keyword"; - Supplier keywordTokenizerFactory = getTokenizerFactory(analysisRegistry, environment, keywordTokenizerName); + TokenizerFactory keywordTokenizerFactory = getTokenizerFactory(analysisRegistry, environment, keywordTokenizerName); List tokenFilterFactoryList = parseTokenFilterFactories(request, indexSettings, analysisRegistry, environment, new Tuple<>(keywordTokenizerName, keywordTokenizerFactory), charFilterFactoryList, true); @@ -306,7 +306,7 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy if (customAnalyzer != null) { // customAnalyzer = divide charfilter, tokenizer tokenfilters CharFilterFactory[] charFilterFactories = customAnalyzer.charFilters(); - Supplier tokenizerFactory = customAnalyzer.tokenizerFactory(); + TokenizerFactory tokenizerFactory = customAnalyzer.tokenizerFactory(); TokenFilterFactory[] tokenFilterFactories = customAnalyzer.tokenFilters(); String[][] charFiltersTexts = new String[charFilterFactories != null ? charFilterFactories.length : 0][request.text().length]; @@ -330,7 +330,7 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy } // analyzing only tokenizer - Tokenizer tokenizer = tokenizerFactory.get(); + Tokenizer tokenizer = tokenizerFactory.create(); tokenizer.setReader(reader); tokenizerTokenListCreator.analyze(tokenizer, customAnalyzer, field, includeAttributes); @@ -381,12 +381,12 @@ private static DetailAnalyzeResponse detailAnalyze(AnalyzeRequest request, Analy return detailResponse; } - private static TokenStream createStackedTokenStream(String source, CharFilterFactory[] charFilterFactories, Supplier tokenizerFactory, TokenFilterFactory[] tokenFilterFactories, int current) { + private static TokenStream createStackedTokenStream(String source, CharFilterFactory[] charFilterFactories, TokenizerFactory tokenizerFactory, TokenFilterFactory[] tokenFilterFactories, int current) { Reader reader = new StringReader(source); for (CharFilterFactory charFilterFactory : charFilterFactories) { reader = charFilterFactory.create(reader); } - Tokenizer tokenizer = tokenizerFactory.get(); + Tokenizer tokenizer = tokenizerFactory.create(); tokenizer.setReader(reader); TokenStream tokenStream = tokenizer; for (int i = 0; i < current; i++) { @@ -571,7 +571,7 @@ private static List parseCharFilterFactories(AnalyzeRequest r } private static List parseTokenFilterFactories(AnalyzeRequest request, IndexSettings indexSettings, AnalysisRegistry analysisRegistry, - Environment environment, Tuple> tokenizerFactory, + Environment environment, Tuple tokenizerFactory, List charFilterFactoryList, boolean normalizer) throws IOException { List tokenFilterFactoryList = new ArrayList<>(); if (request.tokenFilters() != null && request.tokenFilters().size() > 0) { @@ -632,10 +632,10 @@ private static List parseTokenFilterFactories(AnalyzeRequest return tokenFilterFactoryList; } - private static Tuple> parseTokenizerFactory(AnalyzeRequest request, IndexAnalyzers indexAnalzyers, - AnalysisRegistry analysisRegistry, Environment environment) throws IOException { + private static Tuple parseTokenizerFactory(AnalyzeRequest request, IndexAnalyzers indexAnalzyers, + AnalysisRegistry analysisRegistry, Environment environment) throws IOException { String name; - Supplier tokenizerFactory; + TokenizerFactory tokenizerFactory; final AnalyzeRequest.NameOrDefinition tokenizer = request.tokenizer(); // parse anonymous settings if (tokenizer.definition != null) { @@ -644,7 +644,7 @@ private static Tuple> parseTokenizerFactory(AnalyzeR if (tokenizerTypeName == null) { throw new IllegalArgumentException("Missing [type] setting for anonymous tokenizer: " + tokenizer.definition); } - AnalysisModule.AnalysisProvider> tokenizerFactoryFactory = + AnalysisModule.AnalysisProvider tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizerTypeName); if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("failed to find global tokenizer under [" + tokenizerTypeName + "]"); @@ -653,7 +653,7 @@ private static Tuple> parseTokenizerFactory(AnalyzeR name = "_anonymous_tokenizer"; tokenizerFactory = tokenizerFactoryFactory.get(getNaIndexSettings(settings), environment, "_anonymous_tokenizer", settings); } else { - AnalysisModule.AnalysisProvider> tokenizerFactoryFactory; + AnalysisModule.AnalysisProvider tokenizerFactoryFactory; if (indexAnalzyers == null) { tokenizerFactory = getTokenizerFactory(analysisRegistry, environment, tokenizer.name); name = tokenizer.name; @@ -671,9 +671,9 @@ private static Tuple> parseTokenizerFactory(AnalyzeR return new Tuple<>(name, tokenizerFactory); } - private static Supplier getTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment, String name) throws IOException { - AnalysisModule.AnalysisProvider> tokenizerFactoryFactory; - Supplier tokenizerFactory; + private static TokenizerFactory getTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment, String name) throws IOException { + AnalysisModule.AnalysisProvider tokenizerFactoryFactory; + TokenizerFactory tokenizerFactory; tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name); if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("failed to find global tokenizer under [" + name + "]"); diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java index efb4bf6b61cae..4df0375f31cab 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AbstractTokenizerFactory.java @@ -19,15 +19,12 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.Version; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.IndexSettings; -import java.util.function.Supplier; - -public abstract class AbstractTokenizerFactory extends AbstractIndexComponent implements Supplier { +public abstract class AbstractTokenizerFactory extends AbstractIndexComponent implements TokenizerFactory { protected final Version version; public AbstractTokenizerFactory(IndexSettings indexSettings, Settings settings) { diff --git a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index bf560b2e83cb4..c61a7cf070680 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.elasticsearch.core.internal.io.IOUtils; import org.elasticsearch.ElasticsearchException; @@ -40,7 +39,6 @@ import java.util.Locale; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import java.util.function.Supplier; import java.util.stream.Collectors; import static java.util.Collections.unmodifiableMap; @@ -59,14 +57,14 @@ public final class AnalysisRegistry implements Closeable { private final Environment environment; private final Map> charFilters; private final Map> tokenFilters; - private final Map>> tokenizers; + private final Map> tokenizers; private final Map>> analyzers; private final Map>> normalizers; public AnalysisRegistry(Environment environment, Map> charFilters, Map> tokenFilters, - Map>> tokenizers, + Map> tokenizers, Map>> analyzers, Map>> normalizers, Map preConfiguredCharFilters, @@ -98,9 +96,9 @@ public static Settings getSettingsFromIndexSettings(IndexSettings indexSettings, } /** - * Returns a registered {@link Tokenizer} provider by name or null if the tokenizer was not registered + * Returns a registered {@link TokenizerFactory} provider by name or null if the tokenizer was not registered */ - public AnalysisModule.AnalysisProvider> getTokenizerProvider(String tokenizer) { + public AnalysisModule.AnalysisProvider getTokenizerProvider(String tokenizer) { return tokenizers.getOrDefault(tokenizer, this.prebuiltAnalysis.getTokenizerFactory(tokenizer)); } @@ -151,7 +149,7 @@ public void close() throws IOException { public IndexAnalyzers build(IndexSettings indexSettings) throws IOException { final Map charFilterFactories = buildCharFilterFactories(indexSettings); - final Map> tokenizerFactories = buildTokenizerFactories(indexSettings); + final Map tokenizerFactories = buildTokenizerFactories(indexSettings); final Map tokenFilterFactories = buildTokenFilterFactories(indexSettings); final Map> analyzierFactories = buildAnalyzerFactories(indexSettings); final Map> normalizerFactories = buildNormalizerFactories(indexSettings); @@ -182,7 +180,7 @@ public Map buildTokenFilterFactories(IndexSettings i return mappings; } - public Map> buildTokenizerFactories(IndexSettings indexSettings) throws IOException { + public Map buildTokenizerFactories(IndexSettings indexSettings) throws IOException { final Map tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER); return buildMapping(Component.TOKENIZER, indexSettings, tokenizersSettings, tokenizers, prebuiltAnalysis.preConfiguredTokenizers); } @@ -204,14 +202,14 @@ public Map> buildNormalizerFactories(IndexSettings i } /** - * Returns a registered {@link Tokenizer} provider by {@link IndexSettings} - * or a registered {@link Tokenizer} provider by predefined name + * Returns a registered {@link TokenizerFactory} provider by {@link IndexSettings} + * or a registered {@link TokenizerFactory} provider by predefined name * or null if the tokenizer was not registered * @param tokenizer global or defined tokenizer name * @param indexSettings an index settings - * @return {@link Tokenizer} provider or null + * @return {@link TokenizerFactory} provider or null */ - public AnalysisProvider> getTokenizerProvider(String tokenizer, IndexSettings indexSettings) { + public AnalysisProvider getTokenizerProvider(String tokenizer, IndexSettings indexSettings) { final Map tokenizerSettings = indexSettings.getSettings().getGroups("index.analysis.tokenizer"); if (tokenizerSettings.containsKey(tokenizer)) { Settings currentSettings = tokenizerSettings.get(tokenizer); @@ -406,7 +404,7 @@ private static class PrebuiltAnalysis implements Closeable { final Map>> analyzerProviderFactories; final Map> preConfiguredTokenFilters; - final Map>> preConfiguredTokenizers; + final Map> preConfiguredTokenizers; final Map> preConfiguredCharFilterFactories; private PrebuiltAnalysis( @@ -437,7 +435,7 @@ public AnalysisModule.AnalysisProvider getTokenFilterFactory return preConfiguredTokenFilters.get(name); } - public AnalysisModule.AnalysisProvider> getTokenizerFactory(String name) { + public AnalysisModule.AnalysisProvider getTokenizerFactory(String name) { return preConfiguredTokenizers.get(name); } @@ -455,7 +453,7 @@ public void close() throws IOException { public IndexAnalyzers build(IndexSettings indexSettings, Map> analyzerProviders, Map> normalizerProviders, - Map> tokenizerFactoryFactories, + Map tokenizerFactoryFactories, Map charFilterFactoryFactories, Map tokenFilterFactoryFactories) { @@ -509,7 +507,7 @@ private void processAnalyzerFactory(IndexSettings indexSettings, String name, AnalyzerProvider analyzerFactory, Map analyzers, Map tokenFilters, - Map charFilters, Map> tokenizers) { + Map charFilters, Map tokenizers) { /* * Lucene defaults positionIncrementGap to 0 in all analyzers but * Elasticsearch defaults them to 0 only before version 2.0 @@ -559,7 +557,7 @@ private void processNormalizerFactory( AnalyzerProvider normalizerFactory, Map normalizers, String tokenizerName, - Supplier tokenizerFactory, + TokenizerFactory tokenizerFactory, Map tokenFilters, Map charFilters) { if (tokenizerFactory == null) { diff --git a/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java b/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java index 533c7fbf964fd..d70b4628f532c 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java @@ -24,12 +24,11 @@ import org.apache.lucene.analysis.Tokenizer; import java.io.Reader; -import java.util.function.Supplier; public final class CustomAnalyzer extends Analyzer { private final String tokenizerName; - private final Supplier tokenizerFactory; + private final TokenizerFactory tokenizerFactory; private final CharFilterFactory[] charFilters; @@ -38,13 +37,13 @@ public final class CustomAnalyzer extends Analyzer { private final int positionIncrementGap; private final int offsetGap; - public CustomAnalyzer(String tokenizerName, Supplier tokenizerFactory, CharFilterFactory[] charFilters, + public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters, TokenFilterFactory[] tokenFilters) { this(tokenizerName, tokenizerFactory, charFilters, tokenFilters, 0, -1); } - public CustomAnalyzer(String tokenizerName, Supplier tokenizerFactory, CharFilterFactory[] charFilters, - TokenFilterFactory[] tokenFilters, int positionIncrementGap, int offsetGap) { + public CustomAnalyzer(String tokenizerName, TokenizerFactory tokenizerFactory, CharFilterFactory[] charFilters, + TokenFilterFactory[] tokenFilters, int positionIncrementGap, int offsetGap) { this.tokenizerName = tokenizerName; this.tokenizerFactory = tokenizerFactory; this.charFilters = charFilters; @@ -60,7 +59,7 @@ public String getTokenizerName() { return tokenizerName; } - public Supplier tokenizerFactory() { + public TokenizerFactory tokenizerFactory() { return tokenizerFactory; } @@ -87,7 +86,7 @@ public int getOffsetGap(String field) { @Override protected TokenStreamComponents createComponents(String fieldName) { - Tokenizer tokenizer = tokenizerFactory.get(); + Tokenizer tokenizer = tokenizerFactory.create(); TokenStream tokenStream = tokenizer; for (TokenFilterFactory tokenFilter : tokenFilters) { tokenStream = tokenFilter.create(tokenStream); diff --git a/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java b/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java index 96d1745e336ff..4ba078051640a 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzerProvider.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; @@ -28,7 +27,6 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.function.Supplier; /** * A custom analyzer that is built out of a single {@link org.apache.lucene.analysis.Tokenizer} and a list @@ -48,14 +46,14 @@ public CustomAnalyzerProvider(IndexSettings indexSettings, this.environment = environment; } - public void build(final Map> tokenizers, final Map charFilters, + public void build(final Map tokenizers, final Map charFilters, final Map tokenFilters) { String tokenizerName = analyzerSettings.get("tokenizer"); if (tokenizerName == null) { throw new IllegalArgumentException("Custom Analyzer [" + name() + "] must be configured with a tokenizer"); } - Supplier tokenizer = tokenizers.get(tokenizerName); + TokenizerFactory tokenizer = tokenizers.get(tokenizerName); if (tokenizer == null) { throw new IllegalArgumentException("Custom Analyzer [" + name() + "] failed to find tokenizer under name [" + tokenizerName + "]"); } @@ -97,7 +95,7 @@ public void build(final Map> tokenizers, final Map tokenizer, + public static TokenFilterFactory checkAndApplySynonymFilter(TokenFilterFactory tokenFilter, String tokenizerName, TokenizerFactory tokenizer, List tokenFilterList, List charFiltersList, Environment env) { if (tokenFilter instanceof SynonymGraphTokenFilterFactory) { diff --git a/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java b/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java index 4a24bad505f17..13946be3a8d22 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/CustomNormalizerProvider.java @@ -19,14 +19,12 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexSettings; import java.util.ArrayList; import java.util.List; import java.util.Map; -import java.util.function.Supplier; /** * A custom normalizer that is built out of a char and token filters. On the @@ -45,8 +43,8 @@ public CustomNormalizerProvider(IndexSettings indexSettings, this.analyzerSettings = settings; } - public void build(final String tokenizerName, final Supplier tokenizerFactory, - final Map charFilters, final Map tokenFilters) { + public void build(final String tokenizerName, final TokenizerFactory tokenizerFactory, final Map charFilters, + final Map tokenFilters) { if (analyzerSettings.get("tokenizer") != null) { throw new IllegalArgumentException("Custom normalizer [" + name() + "] cannot configure a tokenizer"); } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java index f3e55a4d0c2b6..a38228a86f734 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java @@ -31,7 +31,7 @@ /** * Provides pre-configured, shared {@link Tokenizer}s. */ -public final class PreConfiguredTokenizer extends PreConfiguredAnalysisComponent> { +public final class PreConfiguredTokenizer extends PreConfiguredAnalysisComponent { /** * Create a pre-configured tokenizer that may not vary at all. * @@ -90,14 +90,14 @@ public boolean hasMultiTermComponent() { return multiTermComponent != null; } - private interface MultiTermAwareTokenizerFactory extends Supplier, MultiTermAwareComponent {} + private interface MultiTermAwareTokenizerFactory extends TokenizerFactory, MultiTermAwareComponent {} @Override - protected Supplier create(Version version) { + protected TokenizerFactory create(Version version) { if (multiTermComponent != null) { return new MultiTermAwareTokenizerFactory() { @Override - public Tokenizer get() { + public Tokenizer create() { return create.apply(version); } diff --git a/server/src/main/java/org/elasticsearch/index/analysis/StandardTokenizerFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/StandardTokenizerFactory.java index 1df5b212994eb..2e4473f3b0e6d 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/StandardTokenizerFactory.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/StandardTokenizerFactory.java @@ -36,7 +36,7 @@ public StandardTokenizerFactory(IndexSettings indexSettings, Environment environ } @Override - public Tokenizer get() { + public Tokenizer create() { StandardTokenizer tokenizer = new StandardTokenizer(); tokenizer.setMaxTokenLength(maxTokenLength); return tokenizer; diff --git a/server/src/main/java/org/elasticsearch/index/analysis/TokenizerFactory.java b/server/src/main/java/org/elasticsearch/index/analysis/TokenizerFactory.java new file mode 100644 index 0000000000000..4abed5a62ce71 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/index/analysis/TokenizerFactory.java @@ -0,0 +1,26 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.Tokenizer; + +public interface TokenizerFactory { + Tokenizer create(); +} diff --git a/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java index 31591bc77dfb8..1ecdc797073cf 100644 --- a/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java +++ b/server/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java @@ -20,7 +20,6 @@ package org.elasticsearch.indices.analysis; import org.apache.lucene.analysis.LowerCaseFilter; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.standard.StandardFilter; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -45,6 +44,7 @@ import org.elasticsearch.index.analysis.StopAnalyzerProvider; import org.elasticsearch.index.analysis.StopTokenFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider; import org.elasticsearch.plugins.AnalysisPlugin; @@ -52,7 +52,6 @@ import java.util.List; import java.util.Locale; import java.util.Map; -import java.util.function.Supplier; import static java.util.Collections.unmodifiableMap; import static org.elasticsearch.plugins.AnalysisPlugin.requiresAnalysisSettings; @@ -78,7 +77,7 @@ public AnalysisModule(Environment environment, List plugins) thr NamedRegistry hunspellDictionaries = setupHunspellDictionaries(plugins); hunspellService = new HunspellService(environment.settings(), environment, hunspellDictionaries.getRegistry()); NamedRegistry> tokenFilters = setupTokenFilters(plugins, hunspellService); - NamedRegistry>> tokenizers = setupTokenizers(plugins); + NamedRegistry> tokenizers = setupTokenizers(plugins); NamedRegistry>> analyzers = setupAnalyzers(plugins); NamedRegistry>> normalizers = setupNormalizers(plugins); @@ -195,8 +194,8 @@ static Map setupPreConfiguredTokenizers(List>> setupTokenizers(List plugins) { - NamedRegistry>> tokenizers = new NamedRegistry<>("tokenizer"); + private NamedRegistry> setupTokenizers(List plugins) { + NamedRegistry> tokenizers = new NamedRegistry<>("tokenizer"); tokenizers.register("standard", StandardTokenizerFactory::new); tokenizers.extractAndRegister(plugins, AnalysisPlugin::getTokenizers); return tokenizers; diff --git a/server/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java b/server/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java index 5ed2b82966096..c85981f8dcb91 100644 --- a/server/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java +++ b/server/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java @@ -33,12 +33,12 @@ import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import java.io.IOException; import java.util.List; import java.util.Map; -import java.util.function.Supplier; import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; @@ -81,7 +81,7 @@ default Map> getTokenFilters() { * Override to add additional {@link Tokenizer}s. See {@link #requiresAnalysisSettings(AnalysisProvider)} * how to on get the configuration from the index. */ - default Map>> getTokenizers() { + default Map> getTokenizers() { return emptyMap(); } diff --git a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java index 2c71cb5fb8c8c..c0404a47ab237 100644 --- a/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/admin/indices/TransportAnalyzeActionTests.java @@ -21,7 +21,6 @@ import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.Version; import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; @@ -39,6 +38,7 @@ import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.PreConfiguredCharFilter; import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.indices.analysis.AnalysisModuleTests.AppendCharFilter; @@ -50,7 +50,6 @@ import java.io.Reader; import java.util.List; import java.util.Map; -import java.util.function.Supplier; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; @@ -111,7 +110,7 @@ public Map> getCharFilters() { } @Override - public Map>> getTokenizers() { + public Map> getTokenizers() { return singletonMap("keyword", (indexSettings, environment, name, settings) -> () -> new MockTokenizer(MockTokenizer.KEYWORD, false)); } diff --git a/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java b/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java index 02d06d7fc289c..1dcb2d4e39fd6 100644 --- a/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java +++ b/server/src/test/java/org/elasticsearch/index/analysis/CustomNormalizerTests.java @@ -21,7 +21,6 @@ import org.apache.lucene.analysis.MockLowerCaseFilter; import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; @@ -35,7 +34,6 @@ import java.util.List; import java.util.Map; import java.util.function.Function; -import java.util.function.Supplier; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; @@ -174,7 +172,7 @@ public Object getMultiTermComponent() { } @Override - public Map>> getTokenizers() { + public Map> getTokenizers() { return singletonMap("keyword", (indexSettings, environment, name, settings) -> () -> new MockTokenizer(MockTokenizer.KEYWORD, false)); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java index 5f9e54c8990bc..56e587dc995da 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/KeywordFieldMapperTests.java @@ -35,6 +35,7 @@ import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.MapperService.MergeReason; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.plugins.AnalysisPlugin; @@ -48,7 +49,6 @@ import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.function.Supplier; import static java.util.Collections.singletonList; import static org.apache.lucene.analysis.BaseTokenStreamTestCase.assertTokenStreamContents; @@ -68,10 +68,17 @@ public List getPreConfiguredTokenFilters() { } @Override - public Map>> getTokenizers() { - return singletonMap( - "keyword", (indexSettings, environment, name, settings) -> () -> new MockTokenizer(MockTokenizer.KEYWORD, false) - ); + public Map> getTokenizers() { + return singletonMap("keyword", (indexSettings, environment, name, settings) -> { + class Factory implements TokenizerFactory { + + @Override + public Tokenizer create() { + return new MockTokenizer(MockTokenizer.KEYWORD, false); + } + } + return new Factory(); + }); } }; diff --git a/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java b/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java index 0b528b0189e94..47f30e10ef912 100644 --- a/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java +++ b/server/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java @@ -51,6 +51,7 @@ import org.elasticsearch.index.analysis.StopTokenFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.MyFilterTokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.test.ESTestCase; @@ -71,7 +72,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.function.Supplier; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; @@ -263,7 +263,7 @@ public List getPreConfiguredCharFilters() { } @Override - public Map>> getTokenizers() { + public Map> getTokenizers() { // Need mock keyword tokenizer here, because alpha / beta versions are broken up by the dash. return singletonMap("keyword", (indexSettings, environment, name, settings) -> () -> new MockTokenizer(MockTokenizer.KEYWORD, false)); diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java index d90bc62449942..9cdfc6776f883 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESTestCase.java @@ -37,7 +37,6 @@ import org.apache.logging.log4j.status.StatusConsoleListener; import org.apache.logging.log4j.status.StatusData; import org.apache.logging.log4j.status.StatusLogger; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.TestRuleMarkFailure; @@ -88,6 +87,7 @@ import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.index.mapper.MetadataFieldMapper; import org.elasticsearch.indices.IndicesModule; @@ -1349,12 +1349,12 @@ public static final class TestAnalysis { public final IndexAnalyzers indexAnalyzers; public final Map tokenFilter; - public final Map> tokenizer; + public final Map tokenizer; public final Map charFilter; public TestAnalysis(IndexAnalyzers indexAnalyzers, Map tokenFilter, - Map> tokenizer, + Map tokenizer, Map charFilter) { this.indexAnalyzers = indexAnalyzers; this.tokenFilter = tokenFilter; diff --git a/test/framework/src/main/java/org/elasticsearch/test/MockKeywordPlugin.java b/test/framework/src/main/java/org/elasticsearch/test/MockKeywordPlugin.java index 3754e580e9349..fb9da1dad40fd 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/MockKeywordPlugin.java +++ b/test/framework/src/main/java/org/elasticsearch/test/MockKeywordPlugin.java @@ -20,12 +20,12 @@ import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.Tokenizer; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.plugins.Plugin; import java.util.Map; -import java.util.function.Supplier; import static java.util.Collections.singletonMap; @@ -39,8 +39,16 @@ public class MockKeywordPlugin extends Plugin implements AnalysisPlugin { @Override - public Map>> getTokenizers() { - return singletonMap("keyword", (indexSettings, environment, name, settings) - -> () -> new MockTokenizer(MockTokenizer.KEYWORD, false)); + public Map> getTokenizers() { + return singletonMap("keyword", (indexSettings, environment, name, settings) -> { + class Factory implements TokenizerFactory { + + @Override + public Tokenizer create() { + return new MockTokenizer(MockTokenizer.KEYWORD, false); + } + } + return new Factory(); + }); } } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java index 58ce9ca7e5ea4..1c2808c70ffcf 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/ml/job/config/CategorizationAnalyzerConfig.java @@ -6,7 +6,6 @@ package org.elasticsearch.xpack.core.ml.job.config; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.Version; import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -30,6 +29,7 @@ import org.elasticsearch.index.analysis.CustomAnalyzer; import org.elasticsearch.index.analysis.CustomAnalyzerProvider; import org.elasticsearch.index.analysis.TokenFilterFactory; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.rest.action.admin.indices.RestAnalyzeAction; import org.elasticsearch.xpack.core.ml.MlParserType; @@ -41,7 +41,6 @@ import java.util.List; import java.util.Map; import java.util.Objects; -import java.util.function.Supplier; /** @@ -369,7 +368,7 @@ public Tuple toAnalyzer(AnalysisRegistry analysisRegistry, En List charFilterFactoryList = parseCharFilterFactories(analysisRegistry, environment); - Tuple> tokenizerFactory = parseTokenizerFactory(analysisRegistry, + Tuple tokenizerFactory = parseTokenizerFactory(analysisRegistry, environment); List tokenFilterFactoryList = parseTokenFilterFactories(analysisRegistry, @@ -425,13 +424,13 @@ private List parseCharFilterFactories(AnalysisRegistry analys * Get the tokenizer factory for the configured tokenizer. The configuration * can be the name of an out-of-the-box tokenizer, or a custom definition. */ - private Tuple> parseTokenizerFactory(AnalysisRegistry analysisRegistry, + private Tuple parseTokenizerFactory(AnalysisRegistry analysisRegistry, Environment environment) throws IOException { final String name; - final Supplier tokenizerFactory; + final TokenizerFactory tokenizerFactory; if (tokenizer.name != null) { name = tokenizer.name; - AnalysisModule.AnalysisProvider> tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name); + AnalysisModule.AnalysisProvider tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(name); if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("Failed to find global tokenizer under [" + name + "]"); } @@ -441,7 +440,7 @@ private Tuple> parseTokenizerFactory(AnalysisRegistr if (tokenizerTypeName == null) { throw new IllegalArgumentException("Missing [type] setting for tokenizer: " + tokenizer.definition); } - AnalysisModule.AnalysisProvider> tokenizerFactoryFactory = + AnalysisModule.AnalysisProvider tokenizerFactoryFactory = analysisRegistry.getTokenizerProvider(tokenizerTypeName); if (tokenizerFactoryFactory == null) { throw new IllegalArgumentException("Failed to find global tokenizer under [" + tokenizerTypeName + "]"); @@ -459,7 +458,7 @@ private Tuple> parseTokenizerFactory(AnalysisRegistr * element can be the name of an out-of-the-box token filter, or a custom definition. */ private List parseTokenFilterFactories(AnalysisRegistry analysisRegistry, Environment environment, - Tuple> tokenizerFactory, + Tuple tokenizerFactory, List charFilterFactoryList) throws IOException { final List tokenFilterFactoryList = new ArrayList<>(); for (NameOrDefinition tokenFilter : tokenFilters) { diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java index f2092ead39226..796cae375e3a6 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/LocalStateCompositeXPackPlugin.java @@ -5,7 +5,6 @@ */ package org.elasticsearch.xpack.core; -import org.apache.lucene.analysis.Tokenizer; import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionResponse; import org.elasticsearch.action.support.ActionFilter; @@ -34,6 +33,7 @@ import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.http.HttpServerTransport; import org.elasticsearch.index.IndexModule; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.ingest.Processor; @@ -337,8 +337,8 @@ public UnaryOperator> getIndexTemplateMetaDat } @Override - public Map>> getTokenizers() { - Map>> tokenizers = new HashMap<>(); + public Map> getTokenizers() { + Map> tokenizers = new HashMap<>(); filterPlugins(AnalysisPlugin.class).stream().forEach(p -> tokenizers.putAll(p.getTokenizers())); return tokenizers; } diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java index b3d914b0cf3f8..3d1011c47e2a8 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/MachineLearning.java @@ -6,7 +6,6 @@ package org.elasticsearch.xpack.ml; import org.apache.logging.log4j.Logger; -import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.SetOnce; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.Version; @@ -36,6 +35,7 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.env.NodeEnvironment; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.monitor.os.OsProbe; @@ -581,7 +581,7 @@ public List> getExecutorBuilders(Settings settings) { } @Override - public Map>> getTokenizers() { + public Map> getTokenizers() { return Collections.singletonMap(MlClassicTokenizer.NAME, MlClassicTokenizerFactory::new); } @Override diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/MlClassicTokenizerFactory.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/MlClassicTokenizerFactory.java index 24ee6634d4f6b..95cba4f2dccb5 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/MlClassicTokenizerFactory.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/categorization/MlClassicTokenizerFactory.java @@ -24,7 +24,7 @@ public MlClassicTokenizerFactory(IndexSettings indexSettings, Environment enviro } @Override - public Tokenizer get() { + public Tokenizer create() { return new MlClassicTokenizer(); } } From 2938ab43c0386cc6d1b74a91e218c9107893e9af Mon Sep 17 00:00:00 2001 From: Armin Date: Mon, 16 Jul 2018 16:07:13 +0200 Subject: [PATCH 3/3] Revert converting TokenizerFactory to Supplier --- .../index/analysis/PreConfiguredTokenizer.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java index a38228a86f734..131246d0b766a 100644 --- a/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java +++ b/server/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java @@ -34,7 +34,7 @@ public final class PreConfiguredTokenizer extends PreConfiguredAnalysisComponent { /** * Create a pre-configured tokenizer that may not vary at all. - * + * * @param name the name of the tokenizer in the api * @param create builds the tokenizer * @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the @@ -48,7 +48,7 @@ public static PreConfiguredTokenizer singleton(String name, Supplier /** * Create a pre-configured tokenizer that may vary based on the Lucene version. - * + * * @param name the name of the tokenizer in the api * @param create builds the tokenizer * @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the @@ -62,7 +62,7 @@ public static PreConfiguredTokenizer luceneVersion(String name, Function create; private final Function multiTermComponent; - + private PreConfiguredTokenizer(String name, PreBuiltCacheFactory.CachingStrategy cache, Function create, @Nullable Function multiTermComponent) { super(name, cache);