From 1375e5a215ab9c3dce2a3441a6e2715efde35817 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 17 May 2017 07:38:49 -0400 Subject: [PATCH 1/6] Allow plugins to build pre-configured tokenizers --- .../index/analysis/AnalysisRegistry.java | 38 ++---- .../PreBuiltTokenizerFactoryFactory.java | 50 ------- .../PreConfiguredAnalysisComponent.java | 64 +++++++++ .../analysis/PreConfiguredTokenFilter.java | 91 +++++-------- .../analysis/PreConfiguredTokenizer.java | 128 ++++++++++++++++++ .../indices/analysis/AnalysisModule.java | 36 ++++- .../indices/analysis/PreBuiltTokenizers.java | 123 ++++++++--------- .../elasticsearch/plugins/AnalysisPlugin.java | 14 +- .../elasticsearch/index/IndexModuleTests.java | 3 +- .../index/analysis/AnalysisRegistryTests.java | 22 ++- .../analysis/common/CommonAnalysisPlugin.java | 21 +++ .../common/CommonAnalysisFactoryTests.java | 7 + .../analysis/AnalysisFactoryTestCase.java | 102 ++++++++------ 13 files changed, 436 insertions(+), 263 deletions(-) delete mode 100644 core/src/main/java/org/elasticsearch/index/analysis/PreBuiltTokenizerFactoryFactory.java create mode 100644 core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java create mode 100644 core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index b438cd5af4155..5d099267c79e8 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -74,14 +74,15 @@ public AnalysisRegistry(Environment environment, Map> tokenizers, Map>> analyzers, Map>> normalizers, - Map preConfiguredTokenFilters) { + Map preConfiguredTokenFilters, + Map preConfiguredTokenizers) { this.environment = environment; this.charFilters = unmodifiableMap(charFilters); this.tokenFilters = unmodifiableMap(tokenFilters); this.tokenizers = unmodifiableMap(tokenizers); this.analyzers = unmodifiableMap(analyzers); this.normalizers = unmodifiableMap(normalizers); - prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredTokenFilters); + prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredTokenFilters, preConfiguredTokenizers); } /** @@ -169,12 +170,12 @@ public Map buildTokenFilterFactories(IndexSettings i */ tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings))); tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings))); - return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories); + return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters); } public Map buildTokenizerFactories(IndexSettings indexSettings) throws IOException { final Map tokenizersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_TOKENIZER); - return buildMapping(Component.TOKENIZER, indexSettings, tokenizersSettings, tokenizers, prebuiltAnalysis.tokenizerFactories); + return buildMapping(Component.TOKENIZER, indexSettings, tokenizersSettings, tokenizers, prebuiltAnalysis.preConfiguredTokenizers); } public Map buildCharFilterFactories(IndexSettings indexSettings) throws IOException { @@ -394,31 +395,22 @@ private AnalysisProvider getAnalysisProvider(Component component, Map>> analyzerProviderFactories; - final Map> tokenizerFactories; - final Map> tokenFilterFactories; + final Map> preConfiguredTokenFilters; + final Map> preConfiguredTokenizers; final Map> charFilterFactories; - private PrebuiltAnalysis(Map preConfiguredTokenFilters) { + private PrebuiltAnalysis( + Map preConfiguredTokenFilters, + Map preConfiguredTokenizers) { Map analyzerProviderFactories = new HashMap<>(); - Map tokenizerFactories = new HashMap<>(); Map charFilterFactories = new HashMap<>(); + // Analyzers for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) { String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT); analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT))); } - // Tokenizers - for (PreBuiltTokenizers preBuiltTokenizer : PreBuiltTokenizers.values()) { - String name = preBuiltTokenizer.name().toLowerCase(Locale.ROOT); - tokenizerFactories.put(name, new PreBuiltTokenizerFactoryFactory(preBuiltTokenizer.getTokenizerFactory(Version.CURRENT))); - } - - // Tokenizer aliases - tokenizerFactories.put("nGram", new PreBuiltTokenizerFactoryFactory(PreBuiltTokenizers.NGRAM.getTokenizerFactory(Version.CURRENT))); - tokenizerFactories.put("edgeNGram", new PreBuiltTokenizerFactoryFactory(PreBuiltTokenizers.EDGE_NGRAM.getTokenizerFactory(Version.CURRENT))); - tokenizerFactories.put("PathHierarchy", new PreBuiltTokenizerFactoryFactory(PreBuiltTokenizers.PATH_HIERARCHY.getTokenizerFactory(Version.CURRENT))); - // Char Filters for (PreBuiltCharFilters preBuiltCharFilter : PreBuiltCharFilters.values()) { String name = preBuiltCharFilter.name().toLowerCase(Locale.ROOT); @@ -429,8 +421,8 @@ private PrebuiltAnalysis(Map preConfiguredToke this.analyzerProviderFactories = Collections.unmodifiableMap(analyzerProviderFactories); this.charFilterFactories = Collections.unmodifiableMap(charFilterFactories); - this.tokenizerFactories = Collections.unmodifiableMap(tokenizerFactories); - tokenFilterFactories = preConfiguredTokenFilters; + this.preConfiguredTokenFilters = preConfiguredTokenFilters; + this.preConfiguredTokenizers = preConfiguredTokenizers; } public AnalysisModule.AnalysisProvider getCharFilterFactory(String name) { @@ -438,11 +430,11 @@ public AnalysisModule.AnalysisProvider getCharFilterFactory(S } public AnalysisModule.AnalysisProvider getTokenFilterFactory(String name) { - return tokenFilterFactories.get(name); + return preConfiguredTokenFilters.get(name); } public AnalysisModule.AnalysisProvider getTokenizerFactory(String name) { - return tokenizerFactories.get(name); + return preConfiguredTokenizers.get(name); } public AnalysisModule.AnalysisProvider> getAnalyzerProvider(String name) { diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltTokenizerFactoryFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltTokenizerFactoryFactory.java deleted file mode 100644 index 02218bd7cebc0..0000000000000 --- a/core/src/main/java/org/elasticsearch/index/analysis/PreBuiltTokenizerFactoryFactory.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.analysis; - -import org.elasticsearch.Version; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.indices.analysis.AnalysisModule; -import org.elasticsearch.indices.analysis.PreBuiltTokenizers; - -import java.io.IOException; - -public class PreBuiltTokenizerFactoryFactory implements AnalysisModule.AnalysisProvider { - - private final TokenizerFactory tokenizerFactory; - - public PreBuiltTokenizerFactoryFactory(TokenizerFactory tokenizerFactory) { - this.tokenizerFactory = tokenizerFactory; - } - - public TokenizerFactory get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { - Version indexVersion = Version.indexCreated(settings); - if (!Version.CURRENT.equals(indexVersion)) { - PreBuiltTokenizers preBuiltTokenizers = PreBuiltTokenizers.getOrDefault(name, null); - if (preBuiltTokenizers != null) { - return preBuiltTokenizers.getTokenizerFactory(indexVersion); - } - } - - return tokenizerFactory; - } -} diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java new file mode 100644 index 0000000000000..c5fa406297df3 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java @@ -0,0 +1,64 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.elasticsearch.Version; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.indices.analysis.AnalysisModule; +import org.elasticsearch.indices.analysis.PreBuiltCacheFactory; + +import java.io.IOException; + +/** + * Shared implementation for pre-configured analysis components. + */ +abstract class PreConfiguredAnalysisComponent implements AnalysisModule.AnalysisProvider { + private final String name; + private final PreBuiltCacheFactory.PreBuiltCache cache; + + protected PreConfiguredAnalysisComponent(String name, PreBuiltCacheFactory.CachingStrategy cache) { + this.name = name; + this.cache = PreBuiltCacheFactory.getCache(cache); + } + + @Override + public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { + Version versionCreated = Version.indexCreated(settings); + synchronized (this) { + T factory = cache.get(versionCreated); + if (factory == null) { + factory = create(versionCreated); + cache.put(versionCreated, factory); + } + return factory; + } + } + + /** + * The name of the analysis component in the API. + */ + public String getName() { + return name; + } + + protected abstract T create(final Version version); +} diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java index 1d9e4459c7e50..777fb589c9db0 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenFilter.java @@ -22,21 +22,16 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.elasticsearch.Version; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.env.Environment; -import org.elasticsearch.index.IndexSettings; -import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.indices.analysis.PreBuiltCacheFactory; import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy; -import java.io.IOException; import java.util.function.BiFunction; import java.util.function.Function; /** * Provides pre-configured, shared {@link TokenFilter}s. */ -public final class PreConfiguredTokenFilter implements AnalysisModule.AnalysisProvider { +public final class PreConfiguredTokenFilter extends PreConfiguredAnalysisComponent { /** * Create a pre-configured token filter that may not vary at all. */ @@ -60,35 +55,19 @@ public static PreConfiguredTokenFilter luceneVersion(String name, boolean useFil */ public static PreConfiguredTokenFilter elasticsearchVersion(String name, boolean useFilterForMultitermQueries, BiFunction create) { - return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, CachingStrategy.ELASTICSEARCH, - (tokenStream, version) -> create.apply(tokenStream, version)); + return new PreConfiguredTokenFilter(name, useFilterForMultitermQueries, CachingStrategy.ELASTICSEARCH, create); } - private final String name; private final boolean useFilterForMultitermQueries; - private final PreBuiltCacheFactory.PreBuiltCache cache; private final BiFunction create; private PreConfiguredTokenFilter(String name, boolean useFilterForMultitermQueries, PreBuiltCacheFactory.CachingStrategy cache, BiFunction create) { - this.name = name; + super(name, cache); this.useFilterForMultitermQueries = useFilterForMultitermQueries; - this.cache = PreBuiltCacheFactory.getCache(cache); this.create = create; } - @Override - public TokenFilterFactory get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { - return getTokenFilterFactory(Version.indexCreated(settings)); - } - - /** - * The name of the {@link TokenFilter} in the API. - */ - public String getName() { - return name; - } - /** * Can this {@link TokenFilter} be used in multi-term queries? */ @@ -98,42 +77,36 @@ public boolean shouldUseFilterForMultitermQueries() { private interface MultiTermAwareTokenFilterFactory extends TokenFilterFactory, MultiTermAwareComponent {} - private synchronized TokenFilterFactory getTokenFilterFactory(final Version version) { - TokenFilterFactory factory = cache.get(version); - if (factory == null) { - if (useFilterForMultitermQueries) { - factory = new MultiTermAwareTokenFilterFactory() { - @Override - public String name() { - return name; - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return create.apply(tokenStream, version); - } - - @Override - public Object getMultiTermComponent() { - return this; - } - }; - } else { - factory = new TokenFilterFactory() { - @Override - public String name() { - return name; - } - - @Override - public TokenStream create(TokenStream tokenStream) { - return create.apply(tokenStream, version); - } - }; - } - cache.put(version, factory); + @Override + protected TokenFilterFactory create(Version version) { + if (useFilterForMultitermQueries) { + return new MultiTermAwareTokenFilterFactory() { + @Override + public String name() { + return getName(); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return create.apply(tokenStream, version); + } + + @Override + public Object getMultiTermComponent() { + return this; + } + }; } + return new TokenFilterFactory() { + @Override + public String name() { + return getName(); + } - return factory; + @Override + public TokenStream create(TokenStream tokenStream) { + return create.apply(tokenStream, version); + } + }; } } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java new file mode 100644 index 0000000000000..f2386a9a2732d --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java @@ -0,0 +1,128 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.Tokenizer; +import org.elasticsearch.Version; +import org.elasticsearch.common.Nullable; +import org.elasticsearch.indices.analysis.PreBuiltCacheFactory; +import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy; + +import java.util.function.Function; +import java.util.function.Supplier; + +/** + * Provides pre-configured, shared {@link Tokenizer}s. + */ +public final class PreConfiguredTokenizer extends PreConfiguredAnalysisComponent { + /** + * Create a pre-configured tokenizer that may not vary at all. + * + * @param name the name of the tokenizer in the api + * @param create builds the tokenizer + * @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the + * {@link TokenFilterFactory} that stands in for this tokenizer in multi-term queries. + */ + public static PreConfiguredTokenizer singleton(String name, Supplier create, + @Nullable Supplier multiTermComponent) { + return new PreConfiguredTokenizer(name, CachingStrategy.ONE, version -> create.get(), + multiTermComponent == null ? null : version -> multiTermComponent.get()); + } + + /** + * Create a pre-configured tokenizer that may vary based on the Lucene version. + * + * @param name the name of the tokenizer in the api + * @param create builds the tokenizer + * @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the + * {@link TokenFilterFactory} that stands in for this tokenizer in multi-term queries. + */ + public static PreConfiguredTokenizer luceneVersion(String name, Function create, + @Nullable Function multiTermComponent) { + return new PreConfiguredTokenizer(name, CachingStrategy.LUCENE, version -> create.apply(version.luceneVersion), + multiTermComponent == null ? null : version -> multiTermComponent.apply(version.luceneVersion)); + } + + /** + * Create a pre-configured tokenizer that may vary based on the Elasticsearch version. + * + * @param name the name of the tokenizer in the api + * @param create builds the tokenizer + * @param multiTermComponent null if this tokenizer shouldn't be used for multi-term queries, otherwise a supplier for the + * {@link TokenFilterFactory} that stands in for this tokenizer in multi-term queries. + */ + public static PreConfiguredTokenizer elasticsearchVersion(String name, boolean useFilterForMultitermQueries, + Function create, @Nullable Function multiTermComponent) { + return new PreConfiguredTokenizer(name, CachingStrategy.ELASTICSEARCH, create, multiTermComponent); + } + + private final Function create; + private final Function multiTermComponent; + + private PreConfiguredTokenizer(String name, PreBuiltCacheFactory.CachingStrategy cache, Function create, + @Nullable Function multiTermComponent) { + super(name, cache); + this.create = create; + this.multiTermComponent = multiTermComponent; + } + + /** + * Does this tokenizer has an equivalent component for analyzing multi-term queries? + */ + public boolean hasMultiTermComponent() { + return multiTermComponent != null; + } + + private interface MultiTermAwareTokenizerFactory extends TokenizerFactory, MultiTermAwareComponent {} + + @Override + protected TokenizerFactory create(Version version) { + if (multiTermComponent != null) { + return new MultiTermAwareTokenizerFactory() { + @Override + public String name() { + return getName(); + } + + @Override + public Tokenizer create() { + return create.apply(version); + } + + @Override + public Object getMultiTermComponent() { + return multiTermComponent.apply(version); + } + }; + } else { + return new TokenizerFactory() { + @Override + public String name() { + return getName(); + } + + @Override + public Tokenizer create() { + return create.apply(version); + } + }; + } + } +} diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java index d49edb33eb354..72ebbd2c90449 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java @@ -104,6 +104,7 @@ import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory; import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; +import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.ReverseTokenFilterFactory; import org.elasticsearch.index.analysis.RomanianAnalyzerProvider; import org.elasticsearch.index.analysis.RussianAnalyzerProvider; @@ -141,7 +142,6 @@ import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory; import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory; import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory; -import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy; import org.elasticsearch.plugins.AnalysisPlugin; import java.io.IOException; @@ -178,9 +178,10 @@ public AnalysisModule(Environment environment, List plugins) thr NamedRegistry>> normalizers = setupNormalizers(plugins); Map preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins); + Map preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins); analysisRegistry = new AnalysisRegistry(environment, charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers - .getRegistry(), analyzers.getRegistry(), normalizers.getRegistry(), preConfiguredTokenFilters); + .getRegistry(), analyzers.getRegistry(), normalizers.getRegistry(), preConfiguredTokenFilters, preConfiguredTokenizers); } HunspellService getHunspellService() { @@ -287,6 +288,37 @@ static Map setupPreConfiguredTokenFilters(List return unmodifiableMap(preConfiguredTokenFilters.getRegistry()); } + static Map setupPreConfiguredTokenizers(List plugins) { + NamedRegistry preConfiguredTokenizers = new NamedRegistry<>("pre-configured tokenizer"); + + // Temporary shim to register old style pre-configured tokenizers + for (PreBuiltTokenizers tokenizer : PreBuiltTokenizers.values()) { + String name = tokenizer.name().toLowerCase(Locale.ROOT); + PreConfiguredTokenizer preConfigured; + switch (tokenizer.getCachingStrategy()) { + case ONE: + preConfigured = PreConfiguredTokenizer.singleton(name, + () -> tokenizer.create(Version.CURRENT), null); + break; + default: + throw new UnsupportedOperationException( + "Caching strategy unsupported by temporary shim [" + tokenizer + "]"); + } + preConfiguredTokenizers.register(name, preConfigured); + } + // Temporary shim for aliases. TODO deprecate after they are moved + preConfiguredTokenizers.register("nGram", preConfiguredTokenizers.getRegistry().get("ngram")); + preConfiguredTokenizers.register("edgeNGram", preConfiguredTokenizers.getRegistry().get("edge_ngram")); + preConfiguredTokenizers.register("PathHierarchy", preConfiguredTokenizers.getRegistry().get("path_hierarchy")); + + for (AnalysisPlugin plugin: plugins) { + for (PreConfiguredTokenizer tokenizer : plugin.getPreConfiguredTokenizer()) { + preConfiguredTokenizers.register(tokenizer.getName(), tokenizer); + } + } + return unmodifiableMap(preConfiguredTokenizers.getRegistry()); + } + private NamedRegistry> setupTokenizers(List plugins) { NamedRegistry> tokenizers = new NamedRegistry<>("tokenizer"); tokenizers.register("standard", StandardTokenizerFactory::new); diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java index a9869b56bc3c6..52e7ff6c9c4fa 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenizers.java @@ -21,7 +21,6 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.core.KeywordTokenizer; import org.apache.lucene.analysis.core.LetterTokenizer; -import org.apache.lucene.analysis.core.LowerCaseTokenizer; import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer; import org.apache.lucene.analysis.ngram.NGramTokenizer; @@ -33,6 +32,7 @@ import org.apache.lucene.analysis.th.ThaiTokenizer; import org.elasticsearch.Version; import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.index.analysis.CustomNormalizerProvider; import org.elasticsearch.index.analysis.MultiTermAwareComponent; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; @@ -42,21 +42,21 @@ public enum PreBuiltTokenizers { - STANDARD(CachingStrategy.LUCENE) { + STANDARD(CachingStrategy.ONE) { @Override protected Tokenizer create(Version version) { return new StandardTokenizer(); } }, - CLASSIC(CachingStrategy.LUCENE) { + CLASSIC(CachingStrategy.ONE) { @Override protected Tokenizer create(Version version) { return new ClassicTokenizer(); } }, - UAX_URL_EMAIL(CachingStrategy.LUCENE) { + UAX_URL_EMAIL(CachingStrategy.ONE) { @Override protected Tokenizer create(Version version) { return new UAX29URLEmailTokenizer(); @@ -77,39 +77,28 @@ protected Tokenizer create(Version version) { } }, - LETTER(CachingStrategy.LUCENE) { + LETTER(CachingStrategy.ONE) { @Override protected Tokenizer create(Version version) { return new LetterTokenizer(); } }, - LOWERCASE(CachingStrategy.LUCENE) { - @Override - protected Tokenizer create(Version version) { - return new LowerCaseTokenizer(); - } - @Override - protected TokenFilterFactory getMultiTermComponent(Version version) { - return PreBuiltTokenFilters.LOWERCASE.getTokenFilterFactory(version); - } - }, - - WHITESPACE(CachingStrategy.LUCENE) { + WHITESPACE(CachingStrategy.ONE) { @Override protected Tokenizer create(Version version) { return new WhitespaceTokenizer(); } }, - NGRAM(CachingStrategy.LUCENE) { + NGRAM(CachingStrategy.ONE) { @Override protected Tokenizer create(Version version) { return new NGramTokenizer(); } }, - EDGE_NGRAM(CachingStrategy.LUCENE) { + EDGE_NGRAM(CachingStrategy.ONE) { @Override protected Tokenizer create(Version version) { return new EdgeNGramTokenizer(EdgeNGramTokenizer.DEFAULT_MIN_GRAM_SIZE, EdgeNGramTokenizer.DEFAULT_MAX_GRAM_SIZE); @@ -139,64 +128,60 @@ protected TokenFilterFactory getMultiTermComponent(Version version) { } protected final PreBuiltCacheFactory.PreBuiltCache cache; - + private final CachingStrategy cachingStrategy; PreBuiltTokenizers(CachingStrategy cachingStrategy) { + this.cachingStrategy = cachingStrategy; cache = PreBuiltCacheFactory.getCache(cachingStrategy); } - private interface MultiTermAwareTokenizerFactory extends TokenizerFactory, MultiTermAwareComponent {} - - public synchronized TokenizerFactory getTokenizerFactory(final Version version) { - TokenizerFactory tokenizerFactory = cache.get(version); - if (tokenizerFactory == null) { - final String finalName = name().toLowerCase(Locale.ROOT); - if (getMultiTermComponent(version) != null) { - tokenizerFactory = new MultiTermAwareTokenizerFactory() { - @Override - public String name() { - return finalName; - } - - @Override - public Tokenizer create() { - return PreBuiltTokenizers.this.create(version); - } - - @Override - public Object getMultiTermComponent() { - return PreBuiltTokenizers.this.getMultiTermComponent(version); - } - }; - } else { - tokenizerFactory = new TokenizerFactory() { - @Override - public String name() { - return finalName; - } - - @Override - public Tokenizer create() { - return PreBuiltTokenizers.this.create(version); - } - }; - } - cache.put(version, tokenizerFactory); - } - - return tokenizerFactory; + public CachingStrategy getCachingStrategy() { + return cachingStrategy; } + private interface MultiTermAwareTokenizerFactory extends TokenizerFactory, MultiTermAwareComponent {} + /** - * Get a pre built Tokenizer by its name or fallback to the default one - * @param name Tokenizer name - * @param defaultTokenizer default Tokenizer if name not found + * Old style resolution for {@link TokenizerFactory}. Exists entirely to keep + * {@link CustomNormalizerProvider#build(java.util.Map, java.util.Map)} working during the migration. */ - public static PreBuiltTokenizers getOrDefault(String name, PreBuiltTokenizers defaultTokenizer) { - try { - return valueOf(name.toUpperCase(Locale.ROOT)); - } catch (IllegalArgumentException e) { - return defaultTokenizer; + public synchronized TokenizerFactory getTokenizerFactory(final Version version) { + TokenizerFactory tokenizerFactory = cache.get(version); + if (tokenizerFactory == null) { + final String finalName = name().toLowerCase(Locale.ROOT); + if (getMultiTermComponent(version) != null) { + tokenizerFactory = new MultiTermAwareTokenizerFactory() { + @Override + public String name() { + return finalName; + } + + @Override + public Tokenizer create() { + return PreBuiltTokenizers.this.create(version); + } + + @Override + public Object getMultiTermComponent() { + return PreBuiltTokenizers.this.getMultiTermComponent(version); + } + }; + } else { + tokenizerFactory = new TokenizerFactory() { + @Override + public String name() { + return finalName; + } + + @Override + public Tokenizer create() { + return PreBuiltTokenizers.this.create(version); + } + }; + } + cache.put(version, tokenizerFactory); + } + + return tokenizerFactory; } - } } diff --git a/core/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java b/core/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java index c248c706f2321..400f20ff39d36 100644 --- a/core/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java +++ b/core/src/main/java/org/elasticsearch/plugins/AnalysisPlugin.java @@ -22,24 +22,21 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.CharFilter; import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.elasticsearch.Version; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.AnalyzerProvider; import org.elasticsearch.index.analysis.CharFilterFactory; +import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.index.analysis.TokenizerFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; -import org.elasticsearch.indices.analysis.PreBuiltCacheFactory; import java.io.IOException; import java.util.List; import java.util.Map; -import java.util.function.BiFunction; import static java.util.Collections.emptyList; import static java.util.Collections.emptyMap; @@ -95,12 +92,19 @@ default Map>> getA } /** - * Override to add additional pre-configured token filters. + * Override to add additional pre-configured {@link TokenFilter}s. */ default List getPreConfiguredTokenFilters() { return emptyList(); } + /** + * Override to add additional pre-configured {@link Tokenizer}. + */ + default List getPreConfiguredTokenizer() { + return emptyList(); + } + /** * Override to add additional hunspell {@link org.apache.lucene.analysis.hunspell.Dictionary}s. */ diff --git a/core/src/test/java/org/elasticsearch/index/IndexModuleTests.java b/core/src/test/java/org/elasticsearch/index/IndexModuleTests.java index e3df8c423c886..e2123c4358268 100644 --- a/core/src/test/java/org/elasticsearch/index/IndexModuleTests.java +++ b/core/src/test/java/org/elasticsearch/index/IndexModuleTests.java @@ -124,7 +124,8 @@ public void setUp() throws Exception { indexSettings = IndexSettingsModule.newIndexSettings("foo", settings); index = indexSettings.getIndex(); environment = new Environment(settings); - emptyAnalysisRegistry = new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()); + emptyAnalysisRegistry = new AnalysisRegistry(environment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), + emptyMap(), emptyMap()); threadPool = new TestThreadPool("test"); circuitBreakerService = new NoneCircuitBreakerService(); bigArrays = new BigArrays(settings, circuitBreakerService); diff --git a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java index 033296676275d..2596e7b2cadae 100644 --- a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java +++ b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java @@ -59,13 +59,17 @@ private static AnalyzerProvider analyzerProvider(final String name) { return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDEX, new EnglishAnalyzer()); } + private static AnalysisRegistry emptyAnalysisRegistry(Settings settings) { + return new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), + emptyMap()); + } + @Override public void setUp() throws Exception { super.setUp(); - emptyEnvironment = new Environment(Settings.builder() + emptyRegistry = emptyAnalysisRegistry(Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build()); - emptyRegistry = new AnalysisRegistry(emptyEnvironment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()); emptyIndexSettingsOfCurrentVersion = IndexSettingsModule.newIndexSettings("index", Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .build()); @@ -191,12 +195,8 @@ public void testBuiltInAnalyzersAreCached() throws IOException { Settings indexSettings = Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); - IndexAnalyzers indexAnalyzers = - new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()) - .build(idxSettings); - IndexAnalyzers otherIndexAnalyzers = - new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()) - .build(idxSettings); + IndexAnalyzers indexAnalyzers = emptyAnalysisRegistry(settings).build(idxSettings); + IndexAnalyzers otherIndexAnalyzers = emptyAnalysisRegistry(settings).build(idxSettings); final int numIters = randomIntBetween(5, 20); for (int i = 0; i < numIters; i++) { PreBuiltAnalyzers preBuiltAnalyzers = RandomPicks.randomFrom(random(), PreBuiltAnalyzers.values()); @@ -213,7 +213,7 @@ public void testPreConfiguredTokenFiltersAreCached() throws IOException { return new MockTokenFilter(tokenStream, MockTokenFilter.EMPTY_STOPSET); }); try (AnalysisRegistry registryWithPreBuiltTokenFilter = new AnalysisRegistry(emptyEnvironment, emptyMap(), emptyMap(), emptyMap(), - emptyMap(), emptyMap(), singletonMap("asserts_built_once", assertsBuiltOnce))) { + emptyMap(), emptyMap(), singletonMap("asserts_built_once", assertsBuiltOnce), emptyMap())) { IndexAnalyzers indexAnalyzers = registryWithPreBuiltTokenFilter.build(emptyIndexSettingsOfCurrentVersion); IndexAnalyzers otherIndexAnalyzers = registryWithPreBuiltTokenFilter.build(emptyIndexSettingsOfCurrentVersion); assertSame(indexAnalyzers.get("asserts_built_once"), otherIndexAnalyzers.get("asserts_built_once")); @@ -231,9 +231,7 @@ public void testNoTypeOrTokenizerErrorMessage() throws IOException { .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); - IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> - new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()) - .build(idxSettings)); + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> emptyAnalysisRegistry(settings).build(idxSettings)); assertThat(e.getMessage(), equalTo("analyzer [test_analyzer] must specify either an analyzer type, or a tokenizer")); } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 290b09edc1d6e..0c7f8888d5f88 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -20,7 +20,9 @@ package org.elasticsearch.analysis.common; import org.apache.lucene.analysis.CharArraySet; +import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.StopFilter; +import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ar.ArabicNormalizationFilter; import org.apache.lucene.analysis.ar.ArabicStemFilter; import org.apache.lucene.analysis.br.BrazilianStemFilter; @@ -29,6 +31,7 @@ import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter; import org.apache.lucene.analysis.commongrams.CommonGramsFilter; import org.apache.lucene.analysis.core.DecimalDigitFilter; +import org.apache.lucene.analysis.core.LowerCaseTokenizer; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.UpperCaseFilter; import org.apache.lucene.analysis.cz.CzechStemFilter; @@ -66,6 +69,7 @@ import org.elasticsearch.index.analysis.HtmlStripCharFilterFactory; import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; +import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.TokenFilterFactory; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; @@ -174,4 +178,21 @@ public List getPreConfiguredTokenFilters() { | WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null))); return filters; } + + @Override + public List getPreConfiguredTokenizer() { + List tokenizers = new ArrayList<>(); + tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", LowerCaseTokenizer::new, () -> new TokenFilterFactory() { + @Override + public String name() { + return "lowercase"; + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new LowerCaseFilter(tokenStream); + } + })); + return tokenizers; + } } diff --git a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java index d250540645703..3ce7fd1d301b9 100644 --- a/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java +++ b/modules/analysis-common/src/test/java/org/elasticsearch/analysis/common/CommonAnalysisFactoryTests.java @@ -117,6 +117,13 @@ protected Map> getPreConfiguredTokenFilters() { return filters; } + @Override + protected Map> getPreConfiguredTokenizers() { + Map> filters = new TreeMap<>(super.getPreConfiguredTokenFilters()); + + return filters; + } + /** * Fails if a tokenizer is marked in the superclass with {@link MovedToAnalysisCommon} but * hasn't been marked in this class with its proper factory. diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java index 040d2fb2dc6f3..f2d69ea2d0d72 100644 --- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java @@ -66,6 +66,7 @@ import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory; import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; +import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.ReverseTokenFilterFactory; import org.elasticsearch.index.analysis.ScandinavianFoldingFilterFactory; import org.elasticsearch.index.analysis.ScandinavianNormalizationFilterFactory; @@ -95,6 +96,7 @@ import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; +import java.util.Locale; import java.util.Map; import java.util.Objects; import java.util.Set; @@ -103,6 +105,7 @@ import java.util.regex.Pattern; import static java.util.Collections.singletonList; +import static org.hamcrest.Matchers.typeCompatibleWith; /** * Alerts us if new analysis components are added to Lucene, so we don't miss them. @@ -148,26 +151,6 @@ private static String toCamelCase(String s) { .put("simplepatternsplit", Void.class) .immutableMap(); - static final Map> PREBUILT_TOKENIZERS; - static { - PREBUILT_TOKENIZERS = new EnumMap<>(PreBuiltTokenizers.class); - for (PreBuiltTokenizers tokenizer : PreBuiltTokenizers.values()) { - Class luceneFactoryClazz; - switch (tokenizer) { - case UAX_URL_EMAIL: - luceneFactoryClazz = org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory.class; - break; - case PATH_HIERARCHY: - luceneFactoryClazz = Void.class; - break; - default: - luceneFactoryClazz = org.apache.lucene.analysis.util.TokenizerFactory.lookupClass( - toCamelCase(tokenizer.getTokenizerFactory(Version.CURRENT).name())); - } - PREBUILT_TOKENIZERS.put(tokenizer, luceneFactoryClazz); - } - } - static final Map> KNOWN_TOKENFILTERS = new MapBuilder>() // exposed in ES .put("apostrophe", ApostropheFilterFactory.class) @@ -319,22 +302,26 @@ public AnalysisFactoryTestCase(AnalysisPlugin plugin) { this.plugin = Objects.requireNonNull(plugin, "plugin is required. use an empty plugin for core"); } - protected Map> getTokenizers() { - return KNOWN_TOKENIZERS; + protected Map> getCharFilters() { + return KNOWN_CHARFILTERS; } protected Map> getTokenFilters() { return KNOWN_TOKENFILTERS; } + protected Map> getTokenizers() { + return KNOWN_TOKENIZERS; + } + /** * Map containing pre-configured token filters that should be available * after installing this plugin. The map is from the name of the token * filter to the class of the Lucene {@link TokenFilterFactory} that it - * is emulating. If the Lucene filter factory is {@code null} then the - * test will look it up for you from the name. If there is no Lucene - * {@linkplain TokenFilterFactory} then the right hand side should - * be {@link Void}. + * is emulating. If the Lucene {@linkplain TokenFilterFactory} is + * {@code null} then the test will look it up for you from the name. If + * there is no Lucene {@linkplain TokenFilterFactory} then the right + * hand side should be {@link Void}. */ protected Map> getPreConfiguredTokenFilters() { Map> filters = new HashMap<>(); @@ -343,8 +330,33 @@ protected Map> getPreConfiguredTokenFilters() { return filters; } - protected Map> getCharFilters() { - return KNOWN_CHARFILTERS; + /** + * Map containing pre-configured tokenizers that should be available + * after installing this plugin. The map is from the name of the token + * filter to the class of the Lucene {@link TokenizerFactory} that it + * is emulating. If the Lucene {@linkplain TokenizerFactory} is + * {@code null} then the test will look it up for you from the name. + * If there is no Lucene {@linkplain TokenizerFactory} then the right + * hand side should be {@link Void}. + */ + protected Map> getPreConfiguredTokenizers() { + Map> tokenizers = new HashMap<>(); + // TODO drop this temporary shim when all the old style tokenizers have been migrated to new style + for (PreBuiltTokenizers tokenizer : PreBuiltTokenizers.values()) { + final Class luceneFactoryClazz; + switch (tokenizer) { + case UAX_URL_EMAIL: + luceneFactoryClazz = org.apache.lucene.analysis.standard.UAX29URLEmailTokenizerFactory.class; + break; + case PATH_HIERARCHY: + luceneFactoryClazz = Void.class; + break; + default: + luceneFactoryClazz = null; + } + tokenizers.put(tokenizer.name().toLowerCase(Locale.ROOT), luceneFactoryClazz); + } + return tokenizers; } public void testTokenizers() { @@ -421,22 +433,29 @@ public void testPreBuiltMultiTermAware() { Collection expected = new HashSet<>(); Collection actual = new HashSet<>(); - for (Map.Entry> entry : PREBUILT_TOKENIZERS.entrySet()) { - PreBuiltTokenizers tokenizer = entry.getKey(); + Map preConfiguredTokenFilters = + AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin)); + for (Map.Entry> entry : getPreConfiguredTokenFilters().entrySet()) { + String name = entry.getKey(); Class luceneFactory = entry.getValue(); if (luceneFactory == Void.class) { continue; } - assertTrue(TokenizerFactory.class.isAssignableFrom(luceneFactory)); - if (tokenizer.getTokenizerFactory(Version.CURRENT) instanceof MultiTermAwareComponent) { - actual.add(tokenizer); + if (luceneFactory == null) { + luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name)); + } + assertThat(luceneFactory, typeCompatibleWith(TokenFilterFactory.class)); + PreConfiguredTokenFilter filter = preConfiguredTokenFilters.get(name); + assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter); + if (filter.shouldUseFilterForMultitermQueries()) { + actual.add("token filter [" + name + "]"); } if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) { - expected.add(tokenizer); + expected.add("token filter [" + name + "]"); } } - Map preBuiltTokenFilters = AnalysisModule.setupPreConfiguredTokenFilters(singletonList(plugin)); - for (Map.Entry> entry : getPreConfiguredTokenFilters().entrySet()) { + Map preConfiguredTokenizers = AnalysisModule.setupPreConfiguredTokenizers(singletonList(plugin)); + for (Map.Entry> entry : getPreConfiguredTokenizers().entrySet()) { String name = entry.getKey(); Class luceneFactory = entry.getValue(); if (luceneFactory == Void.class) { @@ -445,14 +464,13 @@ public void testPreBuiltMultiTermAware() { if (luceneFactory == null) { luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name)); } - assertTrue(TokenFilterFactory.class.isAssignableFrom(luceneFactory)); - PreConfiguredTokenFilter filter = preBuiltTokenFilters.get(name); - assertNotNull("test claims pre built token filter [" + name + "] should be available but it wasn't", filter); - if (filter.shouldUseFilterForMultitermQueries()) { - actual.add("token filter [" + name + "]"); + assertThat(luceneFactory, typeCompatibleWith(TokenizerFactory.class)); + PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.get(name); + if (tokenizer.hasMultiTermComponent()) { + actual.add(tokenizer); } if (org.apache.lucene.analysis.util.MultiTermAwareComponent.class.isAssignableFrom(luceneFactory)) { - expected.add("token filter [" + name + "]"); + expected.add(tokenizer); } } for (Map.Entry> entry : PREBUILT_CHARFILTERS.entrySet()) { From ca29ac01d9bcee670a9686d8c6a2f7d8f0942be0 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 17 May 2017 10:42:47 -0400 Subject: [PATCH 2/6] Cleanup --- .../index/analysis/PreConfiguredAnalysisComponent.java | 2 +- .../elasticsearch/indices/analysis/AnalysisFactoryTestCase.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java index c5fa406297df3..3ce49d32951ac 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java @@ -60,5 +60,5 @@ public String getName() { return name; } - protected abstract T create(final Version version); + protected abstract T create(Version version); } diff --git a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java index f2d69ea2d0d72..f70a3a5ef9905 100644 --- a/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/indices/analysis/AnalysisFactoryTestCase.java @@ -462,7 +462,7 @@ public void testPreBuiltMultiTermAware() { continue; } if (luceneFactory == null) { - luceneFactory = TokenFilterFactory.lookupClass(toCamelCase(name)); + luceneFactory = TokenizerFactory.lookupClass(toCamelCase(name)); } assertThat(luceneFactory, typeCompatibleWith(TokenizerFactory.class)); PreConfiguredTokenizer tokenizer = preConfiguredTokenizers.get(name); From c3a2ddd14bc96363e1ec01f9db66bbaaed002983 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 17 May 2017 13:32:04 -0400 Subject: [PATCH 3/6] Drop useless test and add useful one one test wasn't really testing anything even though it looked like it was. It tried to test caching but it failed at it. I'm not actually sure what good the caching provides. Adds unit test for registering tokenizers via plugins. --- .../analysis/PreConfiguredTokenizer.java | 4 +- .../indices/analysis/AnalysisModule.java | 2 +- .../elasticsearch/plugins/AnalysisPlugin.java | 2 +- .../index/analysis/AnalysisRegistryTests.java | 31 +--- .../indices/analysis/AnalysisModuleTests.java | 149 +++++++++++++----- .../analysis/common/CommonAnalysisPlugin.java | 2 +- 6 files changed, 125 insertions(+), 65 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java index f2386a9a2732d..6d1842c7a36c9 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredTokenizer.java @@ -68,8 +68,8 @@ public static PreConfiguredTokenizer luceneVersion(String name, Function create, @Nullable Function multiTermComponent) { + public static PreConfiguredTokenizer elasticsearchVersion(String name, Function create, + @Nullable Function multiTermComponent) { return new PreConfiguredTokenizer(name, CachingStrategy.ELASTICSEARCH, create, multiTermComponent); } diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java index 72ebbd2c90449..4dd146599c9df 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java @@ -312,7 +312,7 @@ static Map setupPreConfiguredTokenizers(List getPreConfiguredTokenFilters() { /** * Override to add additional pre-configured {@link Tokenizer}. */ - default List getPreConfiguredTokenizer() { + default List getPreConfiguredTokenizers() { return emptyList(); } diff --git a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java index 2596e7b2cadae..57ef842072acf 100644 --- a/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java +++ b/core/src/test/java/org/elasticsearch/index/analysis/AnalysisRegistryTests.java @@ -41,7 +41,6 @@ import java.io.IOException; import java.util.Map; -import java.util.concurrent.atomic.AtomicBoolean; import static java.util.Collections.emptyMap; import static java.util.Collections.singletonList; @@ -50,10 +49,7 @@ import static org.hamcrest.Matchers.instanceOf; public class AnalysisRegistryTests extends ESTestCase { - - private Environment emptyEnvironment; private AnalysisRegistry emptyRegistry; - private IndexSettings emptyIndexSettingsOfCurrentVersion; private static AnalyzerProvider analyzerProvider(final String name) { return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDEX, new EnglishAnalyzer()); @@ -64,15 +60,18 @@ private static AnalysisRegistry emptyAnalysisRegistry(Settings settings) { emptyMap()); } + private static IndexSettings indexSettingsOfCurrentVersion(Settings.Builder settings) { + return IndexSettingsModule.newIndexSettings("index", settings + .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) + .build()); + } + @Override public void setUp() throws Exception { super.setUp(); emptyRegistry = emptyAnalysisRegistry(Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build()); - emptyIndexSettingsOfCurrentVersion = IndexSettingsModule.newIndexSettings("index", Settings.builder() - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .build()); } public void testDefaultAnalyzers() throws IOException { @@ -204,22 +203,6 @@ public void testBuiltInAnalyzersAreCached() throws IOException { } } - public void testPreConfiguredTokenFiltersAreCached() throws IOException { - AtomicBoolean built = new AtomicBoolean(false); - PreConfiguredTokenFilter assertsBuiltOnce = PreConfiguredTokenFilter.singleton("asserts_built_once", false, tokenStream -> { - if (false == built.compareAndSet(false, true)) { - fail("Attempted to build the token filter twice when it should have been cached"); - } - return new MockTokenFilter(tokenStream, MockTokenFilter.EMPTY_STOPSET); - }); - try (AnalysisRegistry registryWithPreBuiltTokenFilter = new AnalysisRegistry(emptyEnvironment, emptyMap(), emptyMap(), emptyMap(), - emptyMap(), emptyMap(), singletonMap("asserts_built_once", assertsBuiltOnce), emptyMap())) { - IndexAnalyzers indexAnalyzers = registryWithPreBuiltTokenFilter.build(emptyIndexSettingsOfCurrentVersion); - IndexAnalyzers otherIndexAnalyzers = registryWithPreBuiltTokenFilter.build(emptyIndexSettingsOfCurrentVersion); - assertSame(indexAnalyzers.get("asserts_built_once"), otherIndexAnalyzers.get("asserts_built_once")); - } - } - public void testNoTypeOrTokenizerErrorMessage() throws IOException { Version version = VersionUtils.randomVersion(random()); Settings settings = Settings @@ -236,7 +219,7 @@ public void testNoTypeOrTokenizerErrorMessage() throws IOException { } public void testCloseIndexAnalyzersMultipleTimes() throws IOException { - IndexAnalyzers indexAnalyzers = emptyRegistry.build(emptyIndexSettingsOfCurrentVersion); + IndexAnalyzers indexAnalyzers = emptyRegistry.build(indexSettingsOfCurrentVersion(Settings.builder())); indexAnalyzers.close(); indexAnalyzers.close(); } diff --git a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java index 298c8938dd2ef..3a43d0b45b476 100644 --- a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java +++ b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java @@ -23,7 +23,6 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.hunspell.Dictionary; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -37,12 +36,12 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.analysis.Analysis; import org.elasticsearch.index.analysis.AnalysisRegistry; -import org.elasticsearch.index.analysis.AnalysisTestsHelper; import org.elasticsearch.index.analysis.CharFilterFactory; import org.elasticsearch.index.analysis.CustomAnalyzer; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; import org.elasticsearch.index.analysis.PreConfiguredTokenFilter; +import org.elasticsearch.index.analysis.PreConfiguredTokenizer; import org.elasticsearch.index.analysis.StandardTokenizerFactory; import org.elasticsearch.index.analysis.StopTokenFilterFactory; import org.elasticsearch.index.analysis.TokenFilterFactory; @@ -57,7 +56,6 @@ import java.io.BufferedWriter; import java.io.IOException; import java.io.InputStream; -import java.io.StringReader; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; @@ -164,18 +162,6 @@ public void testVersionedAnalyzers() throws Exception { assertEquals(org.apache.lucene.util.Version.fromBits(3,6,0), indexAnalyzers.get("custom7").analyzer().getVersion()); } - private void assertTokenFilter(String name, Class clazz) throws IOException { - Settings settings = Settings.builder() - .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build(); - TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings); - TokenFilterFactory tokenFilter = analysis.tokenFilter.get(name); - Tokenizer tokenizer = new WhitespaceTokenizer(); - tokenizer.setReader(new StringReader("foo bar")); - TokenStream stream = tokenFilter.create(tokenizer); - assertThat(stream, instanceOf(clazz)); - } - private void testSimpleConfiguration(Settings settings) throws IOException { IndexAnalyzers indexAnalyzers = getIndexAnalyzers(settings); Analyzer analyzer = indexAnalyzers.get("custom1").analyzer(); @@ -269,27 +255,6 @@ public void testUnderscoreInAnalyzerName() throws IOException { * and that do not vary based on version at all. */ public void testPluginPreConfiguredTokenFilters() throws IOException { - // Simple token filter that appends text to the term - final class AppendTokenFilter extends TokenFilter { - private final CharTermAttribute term = addAttribute(CharTermAttribute.class); - private final char[] appendMe; - - protected AppendTokenFilter(TokenStream input, String appendMe) { - super(input); - this.appendMe = appendMe.toCharArray(); - } - - @Override - public boolean incrementToken() throws IOException { - if (false == input.incrementToken()) { - return false; - } - term.resizeBuffer(term.length() + appendMe.length); - System.arraycopy(appendMe, 0, term.buffer(), term.length(), appendMe.length); - term.setLength(term.length() + appendMe.length); - return true; - } - } boolean noVersionSupportsMultiTerm = randomBoolean(); boolean luceneVersionSupportsMultiTerm = randomBoolean(); boolean elasticsearchVersionSupportsMultiTerm = randomBoolean(); @@ -329,6 +294,81 @@ public List getPreConfiguredTokenFilters() { analyzers.get("elasticsearch_version").normalize("", "test").utf8ToString()); } + /** + * Tests that plugins can register pre-configured token filters that vary in behavior based on Elasticsearch version, Lucene version, + * and that do not vary based on version at all. + */ + public void testPluginPreConfiguredTokenizers() throws IOException { + boolean noVersionSupportsMultiTerm = randomBoolean(); + boolean luceneVersionSupportsMultiTerm = randomBoolean(); + boolean elasticsearchVersionSupportsMultiTerm = randomBoolean(); + + // Simple tokenizer that always spits out a single token with some preconfigured characters + final class FixedTokenizer extends Tokenizer { + private final CharTermAttribute term = addAttribute(CharTermAttribute.class); + private final char[] chars; + private boolean read = false; + + protected FixedTokenizer(String chars) { + this.chars = chars.toCharArray(); + } + + @Override + public boolean incrementToken() throws IOException { + if (read) { + return false; + } + clearAttributes(); + read = true; + term.resizeBuffer(chars.length); + System.arraycopy(chars, 0, term.buffer(), 0, chars.length); + term.setLength(chars.length); + return true; + } + + @Override + public void reset() throws IOException { + super.reset(); + read = false; + } + } + AnalysisRegistry registry = new AnalysisModule(new Environment(emptyNodeSettings), singletonList(new AnalysisPlugin() { + @Override + public List getPreConfiguredTokenizers() { + return Arrays.asList( + PreConfiguredTokenizer.singleton("no_version", () -> new FixedTokenizer("no_version"), + noVersionSupportsMultiTerm ? () -> AppendTokenFilter.factoryForSuffix("no_version") : null), + PreConfiguredTokenizer.luceneVersion("lucene_version", + luceneVersion -> new FixedTokenizer(luceneVersion.toString()), + noVersionSupportsMultiTerm ? + luceneVersion -> AppendTokenFilter.factoryForSuffix(luceneVersion.toString()) : null), + PreConfiguredTokenizer.elasticsearchVersion("elasticsearch_version", + esVersion -> new FixedTokenizer(esVersion.toString()), + noVersionSupportsMultiTerm ? + esVersion -> AppendTokenFilter.factoryForSuffix(esVersion.toString()) : null) + ); + } + })).getAnalysisRegistry(); + + Version version = VersionUtils.randomVersion(random()); + IndexAnalyzers analyzers = getIndexAnalyzers(registry, Settings.builder() + .put("index.analysis.analyzer.no_version.tokenizer", "no_version") + .put("index.analysis.analyzer.lucene_version.tokenizer", "lucene_version") + .put("index.analysis.analyzer.elasticsearch_version.tokenizer", "elasticsearch_version") + .put(IndexMetaData.SETTING_VERSION_CREATED, version) + .build()); + assertTokenStreamContents(analyzers.get("no_version").tokenStream("", "test"), new String[] {"no_version"}); + assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] {version.luceneVersion.toString()}); + assertTokenStreamContents(analyzers.get("elasticsearch_version").tokenStream("", "test"), new String[] {version.toString()}); + + assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""), + analyzers.get("no_version").normalize("", "test").utf8ToString()); + assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""), + analyzers.get("lucene_version").normalize("", "test").utf8ToString()); + assertEquals("test" + (elasticsearchVersionSupportsMultiTerm ? version.toString() : ""), + analyzers.get("elasticsearch_version").normalize("", "test").utf8ToString()); + } + public void testRegisterHunspellDictionary() throws Exception { Settings settings = Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) @@ -349,4 +389,41 @@ public Map getHunspellDictionaries() { })); assertSame(dictionary, module.getHunspellService().getDictionary("foo")); } + + // Simple token filter that appends text to the term + private static class AppendTokenFilter extends TokenFilter { + public static TokenFilterFactory factoryForSuffix(String suffix) { + return new TokenFilterFactory() { + @Override + public String name() { + return suffix; + } + + @Override + public TokenStream create(TokenStream tokenStream) { + return new AppendTokenFilter(tokenStream, suffix); + } + }; + } + + private final CharTermAttribute term = addAttribute(CharTermAttribute.class); + private final char[] appendMe; + + protected AppendTokenFilter(TokenStream input, String appendMe) { + super(input); + this.appendMe = appendMe.toCharArray(); + } + + @Override + public boolean incrementToken() throws IOException { + if (false == input.incrementToken()) { + return false; + } + term.resizeBuffer(term.length() + appendMe.length); + System.arraycopy(appendMe, 0, term.buffer(), term.length(), appendMe.length); + term.setLength(term.length() + appendMe.length); + return true; + } + } + } diff --git a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java index 0c7f8888d5f88..fcca4f7eddff0 100644 --- a/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java +++ b/modules/analysis-common/src/main/java/org/elasticsearch/analysis/common/CommonAnalysisPlugin.java @@ -180,7 +180,7 @@ public List getPreConfiguredTokenFilters() { } @Override - public List getPreConfiguredTokenizer() { + public List getPreConfiguredTokenizers() { List tokenizers = new ArrayList<>(); tokenizers.add(PreConfiguredTokenizer.singleton("lowercase", LowerCaseTokenizer::new, () -> new TokenFilterFactory() { @Override From d14ef8ec57110a89c26aa10b8ec6bfce94954968 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Wed, 17 May 2017 14:26:47 -0400 Subject: [PATCH 4/6] tmp --- .../indices/analysis/AnalysisModuleTests.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java index 3a43d0b45b476..5b4bfee4c9252 100644 --- a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java +++ b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java @@ -19,6 +19,8 @@ package org.elasticsearch.indices.analysis; +import com.carrotsearch.randomizedtesting.annotations.Repeat; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -298,6 +300,7 @@ public List getPreConfiguredTokenFilters() { * Tests that plugins can register pre-configured token filters that vary in behavior based on Elasticsearch version, Lucene version, * and that do not vary based on version at all. */ + @Repeat(iterations = 100) public void testPluginPreConfiguredTokenizers() throws IOException { boolean noVersionSupportsMultiTerm = randomBoolean(); boolean luceneVersionSupportsMultiTerm = randomBoolean(); @@ -340,11 +343,11 @@ public List getPreConfiguredTokenizers() { noVersionSupportsMultiTerm ? () -> AppendTokenFilter.factoryForSuffix("no_version") : null), PreConfiguredTokenizer.luceneVersion("lucene_version", luceneVersion -> new FixedTokenizer(luceneVersion.toString()), - noVersionSupportsMultiTerm ? + luceneVersionSupportsMultiTerm ? luceneVersion -> AppendTokenFilter.factoryForSuffix(luceneVersion.toString()) : null), PreConfiguredTokenizer.elasticsearchVersion("elasticsearch_version", esVersion -> new FixedTokenizer(esVersion.toString()), - noVersionSupportsMultiTerm ? + elasticsearchVersionSupportsMultiTerm ? esVersion -> AppendTokenFilter.factoryForSuffix(esVersion.toString()) : null) ); } From f01f9caa1573121fc003141ad84c4b6974205b8f Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 19 May 2017 10:20:49 -0400 Subject: [PATCH 5/6] Drop test for broken thing We have an issue tracking it. --- .../indices/analysis/AnalysisModuleTests.java | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java index 5b4bfee4c9252..f94c0c8fe746d 100644 --- a/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java +++ b/core/src/test/java/org/elasticsearch/indices/analysis/AnalysisModuleTests.java @@ -19,8 +19,6 @@ package org.elasticsearch.indices.analysis; -import com.carrotsearch.randomizedtesting.annotations.Repeat; - import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; @@ -300,7 +298,6 @@ public List getPreConfiguredTokenFilters() { * Tests that plugins can register pre-configured token filters that vary in behavior based on Elasticsearch version, Lucene version, * and that do not vary based on version at all. */ - @Repeat(iterations = 100) public void testPluginPreConfiguredTokenizers() throws IOException { boolean noVersionSupportsMultiTerm = randomBoolean(); boolean luceneVersionSupportsMultiTerm = randomBoolean(); @@ -364,12 +361,13 @@ public List getPreConfiguredTokenizers() { assertTokenStreamContents(analyzers.get("lucene_version").tokenStream("", "test"), new String[] {version.luceneVersion.toString()}); assertTokenStreamContents(analyzers.get("elasticsearch_version").tokenStream("", "test"), new String[] {version.toString()}); - assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""), - analyzers.get("no_version").normalize("", "test").utf8ToString()); - assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""), - analyzers.get("lucene_version").normalize("", "test").utf8ToString()); - assertEquals("test" + (elasticsearchVersionSupportsMultiTerm ? version.toString() : ""), - analyzers.get("elasticsearch_version").normalize("", "test").utf8ToString()); + // These are current broken by https://github.com/elastic/elasticsearch/issues/24752 +// assertEquals("test" + (noVersionSupportsMultiTerm ? "no_version" : ""), +// analyzers.get("no_version").normalize("", "test").utf8ToString()); +// assertEquals("test" + (luceneVersionSupportsMultiTerm ? version.luceneVersion.toString() : ""), +// analyzers.get("lucene_version").normalize("", "test").utf8ToString()); +// assertEquals("test" + (elasticsearchVersionSupportsMultiTerm ? version.toString() : ""), +// analyzers.get("elasticsearch_version").normalize("", "test").utf8ToString()); } public void testRegisterHunspellDictionary() throws Exception { From f2637dbc9a375df31537b97de3acb4d99e242277 Mon Sep 17 00:00:00 2001 From: Nik Everett Date: Fri, 19 May 2017 10:22:18 -0400 Subject: [PATCH 6/6] Public --- .../index/analysis/PreConfiguredAnalysisComponent.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java index 3ce49d32951ac..fdd525d0c80dd 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/PreConfiguredAnalysisComponent.java @@ -31,7 +31,7 @@ /** * Shared implementation for pre-configured analysis components. */ -abstract class PreConfiguredAnalysisComponent implements AnalysisModule.AnalysisProvider { +public abstract class PreConfiguredAnalysisComponent implements AnalysisModule.AnalysisProvider { private final String name; private final PreBuiltCacheFactory.PreBuiltCache cache;