Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,6 @@
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.elasticsearch.indices.analysis.PreBuiltAnalyzers;
import org.elasticsearch.indices.analysis.PreBuiltCharFilters;
import org.elasticsearch.indices.analysis.PreBuiltTokenizers;

import java.io.Closeable;
import java.io.IOException;
Expand Down Expand Up @@ -74,6 +72,7 @@ public AnalysisRegistry(Environment environment,
Map<String, AnalysisProvider<TokenizerFactory>> tokenizers,
Map<String, AnalysisProvider<AnalyzerProvider<?>>> analyzers,
Map<String, AnalysisProvider<AnalyzerProvider<?>>> normalizers,
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers) {
this.environment = environment;
Expand All @@ -82,7 +81,7 @@ public AnalysisRegistry(Environment environment,
this.tokenizers = unmodifiableMap(tokenizers);
this.analyzers = unmodifiableMap(analyzers);
this.normalizers = unmodifiableMap(normalizers);
prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredTokenFilters, preConfiguredTokenizers);
prebuiltAnalysis = new PrebuiltAnalysis(preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers);
}

/**
Expand Down Expand Up @@ -180,7 +179,7 @@ public Map<String, TokenizerFactory> buildTokenizerFactories(IndexSettings index

public Map<String, CharFilterFactory> buildCharFilterFactories(IndexSettings indexSettings) throws IOException {
final Map<String, Settings> charFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_CHAR_FILTER);
return buildMapping(Component.CHAR_FILTER, indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.charFilterFactories);
return buildMapping(Component.CHAR_FILTER, indexSettings, charFiltersSettings, charFilters, prebuiltAnalysis.preConfiguredCharFilterFactories);
}

public Map<String, AnalyzerProvider<?>> buildAnalyzerFactories(IndexSettings indexSettings) throws IOException {
Expand Down Expand Up @@ -397,36 +396,28 @@ private static class PrebuiltAnalysis implements Closeable {
final Map<String, AnalysisModule.AnalysisProvider<AnalyzerProvider<?>>> analyzerProviderFactories;
final Map<String, ? extends AnalysisProvider<TokenFilterFactory>> preConfiguredTokenFilters;
final Map<String, ? extends AnalysisProvider<TokenizerFactory>> preConfiguredTokenizers;
final Map<String, AnalysisModule.AnalysisProvider<CharFilterFactory>> charFilterFactories;
final Map<String, ? extends AnalysisProvider<CharFilterFactory>> preConfiguredCharFilterFactories;

private PrebuiltAnalysis(
Map<String, PreConfiguredCharFilter> preConfiguredCharFilters,
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters,
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers) {
Map<String, PreBuiltAnalyzerProviderFactory> analyzerProviderFactories = new HashMap<>();
Map<String, PreBuiltCharFilterFactoryFactory> charFilterFactories = new HashMap<>();

// Analyzers
for (PreBuiltAnalyzers preBuiltAnalyzerEnum : PreBuiltAnalyzers.values()) {
String name = preBuiltAnalyzerEnum.name().toLowerCase(Locale.ROOT);
analyzerProviderFactories.put(name, new PreBuiltAnalyzerProviderFactory(name, AnalyzerScope.INDICES, preBuiltAnalyzerEnum.getAnalyzer(Version.CURRENT)));
}

// Char Filters
for (PreBuiltCharFilters preBuiltCharFilter : PreBuiltCharFilters.values()) {
String name = preBuiltCharFilter.name().toLowerCase(Locale.ROOT);
charFilterFactories.put(name, new PreBuiltCharFilterFactoryFactory(preBuiltCharFilter.getCharFilterFactory(Version.CURRENT)));
}
// Char filter aliases
charFilterFactories.put("htmlStrip", new PreBuiltCharFilterFactoryFactory(PreBuiltCharFilters.HTML_STRIP.getCharFilterFactory(Version.CURRENT)));

this.analyzerProviderFactories = Collections.unmodifiableMap(analyzerProviderFactories);
this.charFilterFactories = Collections.unmodifiableMap(charFilterFactories);
this.preConfiguredCharFilterFactories = preConfiguredCharFilters;
this.preConfiguredTokenFilters = preConfiguredTokenFilters;
this.preConfiguredTokenizers = preConfiguredTokenizers;
}

public AnalysisModule.AnalysisProvider<CharFilterFactory> getCharFilterFactory(String name) {
return charFilterFactories.get(name);
return preConfiguredCharFilterFactories.get(name);
}

public AnalysisModule.AnalysisProvider<TokenFilterFactory> getTokenFilterFactory(String name) {
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.CharFilter;
import org.apache.lucene.analysis.TokenFilter;
import org.elasticsearch.Version;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;

import java.io.Reader;
import java.util.function.BiFunction;
import java.util.function.Function;

/**
* Provides pre-configured, shared {@link CharFilter}s.
*/
public class PreConfiguredCharFilter extends PreConfiguredAnalysisComponent<CharFilterFactory> {
/**
* Create a pre-configured char filter that may not vary at all.
*/
public static PreConfiguredCharFilter singleton(String name, boolean useFilterForMultitermQueries, Function<Reader, Reader> create) {
return new PreConfiguredCharFilter(name, CachingStrategy.ONE, useFilterForMultitermQueries,
(reader, version) -> create.apply(reader));
}

/**
* Create a pre-configured token filter that may vary based on the Lucene version.
*/
public static PreConfiguredCharFilter luceneVersion(String name, boolean useFilterForMultitermQueries,
BiFunction<Reader, org.apache.lucene.util.Version, Reader> create) {
return new PreConfiguredCharFilter(name, CachingStrategy.LUCENE, useFilterForMultitermQueries,
(reader, version) -> create.apply(reader, version.luceneVersion));
}

/**
* Create a pre-configured token filter that may vary based on the Elasticsearch version.
*/
public static PreConfiguredCharFilter elasticsearchVersion(String name, boolean useFilterForMultitermQueries,
BiFunction<Reader, org.elasticsearch.Version, Reader> create) {
return new PreConfiguredCharFilter(name, CachingStrategy.ELASTICSEARCH, useFilterForMultitermQueries, create);
}

private final boolean useFilterForMultitermQueries;
private final BiFunction<Reader, Version, Reader> create;

protected PreConfiguredCharFilter(String name, CachingStrategy cache, boolean useFilterForMultitermQueries,
BiFunction<Reader, org.elasticsearch.Version, Reader> create) {
super(name, cache);
this.useFilterForMultitermQueries = useFilterForMultitermQueries;
this.create = create;
}

/**
* Can this {@link TokenFilter} be used in multi-term queries?
*/
public boolean shouldUseFilterForMultitermQueries() {
return useFilterForMultitermQueries;
}

private interface MultiTermAwareCharFilterFactory extends CharFilterFactory, MultiTermAwareComponent {}

@Override
protected CharFilterFactory create(Version version) {
if (useFilterForMultitermQueries) {
return new MultiTermAwareCharFilterFactory() {
@Override
public String name() {
return getName();
}

@Override
public Reader create(Reader reader) {
return create.apply(reader, version);
}

@Override
public Object getMultiTermComponent() {
return this;
}
};
}
return new CharFilterFactory() {
@Override
public Reader create(Reader reader) {
return create.apply(reader, version);
}

@Override
public String name() {
return getName();
}
};
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
Expand Down Expand Up @@ -173,11 +174,14 @@ public AnalysisModule(Environment environment, List<AnalysisPlugin> plugins) thr
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers(plugins);

Map<String, PreConfiguredCharFilter> preConfiguredCharFilters = setupPreConfiguredCharFilters(plugins);
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
Map<String, PreConfiguredTokenizer> preConfiguredTokenizers = setupPreConfiguredTokenizers(plugins);

analysisRegistry = new AnalysisRegistry(environment, charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers
.getRegistry(), analyzers.getRegistry(), normalizers.getRegistry(), preConfiguredTokenFilters, preConfiguredTokenizers);
analysisRegistry = new AnalysisRegistry(environment,
charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers.getRegistry(),
analyzers.getRegistry(), normalizers.getRegistry(),
preConfiguredCharFilters, preConfiguredTokenFilters, preConfiguredTokenizers);
}

HunspellService getHunspellService() {
Expand Down Expand Up @@ -261,6 +265,19 @@ private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(Li
return tokenFilters;
}

static Map<String, PreConfiguredCharFilter> setupPreConfiguredCharFilters(List<AnalysisPlugin> plugins) {
NamedRegistry<PreConfiguredCharFilter> preConfiguredCharFilters = new NamedRegistry<>("pre-configured char_filter");

// No char filter are available in lucene-core so none are built in to Elasticsearch core

for (AnalysisPlugin plugin: plugins) {
for (PreConfiguredCharFilter filter : plugin.getPreConfiguredCharFilters()) {
preConfiguredCharFilters.register(filter.getName(), filter);
}
}
return unmodifiableMap(preConfiguredCharFilters.getRegistry());
}

static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List<AnalysisPlugin> plugins) {
NamedRegistry<PreConfiguredTokenFilter> preConfiguredTokenFilters = new NamedRegistry<>("pre-configured token_filter");

Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
import org.elasticsearch.index.analysis.PreConfiguredCharFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.PreConfiguredTokenizer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
Expand Down Expand Up @@ -91,6 +92,13 @@ default Map<String, AnalysisProvider<AnalyzerProvider<? extends Analyzer>>> getA
return emptyMap();
}

/**
* Override to add additional pre-configured {@link CharFilter}s.
*/
default List<PreConfiguredCharFilter> getPreConfiguredCharFilters() {
return emptyList();
}

/**
* Override to add additional pre-configured {@link TokenFilter}s.
*/
Expand Down
Loading