Skip to content
Original file line number Diff line number Diff line change
Expand Up @@ -278,22 +278,6 @@ static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List
* version uses a set of English stop words that are in
* lucene-analyzers-common so "stop" is defined in the analysis-common
* module. */

// Add token filters declared in PreBuiltTokenFilters until they have all been migrated
for (PreBuiltTokenFilters preBuilt : PreBuiltTokenFilters.values()) {
switch (preBuilt) {
case LOWERCASE:
// This has been migrated but has to stick around until PreBuiltTokenizers is removed.
continue;
default:
if (CachingStrategy.ONE != preBuilt.getCachingStrategy()) {
throw new UnsupportedOperationException("shim not available for " + preBuilt.getCachingStrategy());
}
String name = preBuilt.name().toLowerCase(Locale.ROOT);
preConfiguredTokenFilters.register(name, PreConfiguredTokenFilter.singleton(name, preBuilt.isMultiTermAware(),
tokenStream -> preBuilt.create(tokenStream, Version.CURRENT)));
}
}

for (AnalysisPlugin plugin: plugins) {
for (PreConfiguredTokenFilter filter : plugin.getPreConfiguredTokenFilters()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,38 +20,10 @@

import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.ar.ArabicStemFilter;
import org.apache.lucene.analysis.br.BrazilianStemFilter;
import org.apache.lucene.analysis.cjk.CJKBigramFilter;
import org.apache.lucene.analysis.cjk.CJKWidthFilter;
import org.apache.lucene.analysis.ckb.SoraniNormalizationFilter;
import org.apache.lucene.analysis.core.DecimalDigitFilter;
import org.apache.lucene.analysis.cz.CzechStemFilter;
import org.apache.lucene.analysis.de.GermanNormalizationFilter;
import org.apache.lucene.analysis.de.GermanStemFilter;
import org.apache.lucene.analysis.fa.PersianNormalizationFilter;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.hi.HindiNormalizationFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilter;
import org.apache.lucene.analysis.miscellaneous.LimitTokenCountFilter;
import org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilter;
import org.apache.lucene.analysis.miscellaneous.ScandinavianNormalizationFilter;
import org.apache.lucene.analysis.payloads.DelimitedPayloadTokenFilter;
import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilter;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.tr.ApostropheFilter;
import org.apache.lucene.analysis.util.ElisionFilter;
import org.elasticsearch.Version;
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
import org.elasticsearch.index.analysis.MultiTermAwareComponent;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import org.tartarus.snowball.ext.DutchStemmer;
import org.tartarus.snowball.ext.FrenchStemmer;

import java.util.Locale;

Expand All @@ -66,229 +38,7 @@ public TokenStream create(TokenStream tokenStream, Version version) {
protected boolean isMultiTermAware() {
return true;
}
},

// Extended Token Filters
ELISION(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ElisionFilter(tokenStream, FrenchAnalyzer.DEFAULT_ARTICLES);
}
@Override
protected boolean isMultiTermAware() {
return true;
}
},

ARABIC_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ArabicStemFilter(tokenStream);
}
},

BRAZILIAN_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new BrazilianStemFilter(tokenStream);
}
},

CZECH_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new CzechStemFilter(tokenStream);
}
},

DUTCH_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new SnowballFilter(tokenStream, new DutchStemmer());
}
},

FRENCH_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new SnowballFilter(tokenStream, new FrenchStemmer());
}
},

GERMAN_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new GermanStemFilter(tokenStream);
}
},

RUSSIAN_STEM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new SnowballFilter(tokenStream, "Russian");
}
},

KEYWORD_REPEAT(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new KeywordRepeatFilter(tokenStream);
}
},

ARABIC_NORMALIZATION(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ArabicNormalizationFilter(tokenStream);
}
@Override
protected boolean isMultiTermAware() {
return true;
}
},

PERSIAN_NORMALIZATION(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new PersianNormalizationFilter(tokenStream);
}
@Override
protected boolean isMultiTermAware() {
return true;
}
},

TYPE_AS_PAYLOAD(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new TypeAsPayloadTokenFilter(tokenStream);
}
},

SHINGLE(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ShingleFilter(tokenStream);
}
},

GERMAN_NORMALIZATION(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new GermanNormalizationFilter(tokenStream);
}
@Override
protected boolean isMultiTermAware() {
return true;
}
},

HINDI_NORMALIZATION(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new HindiNormalizationFilter(tokenStream);
}
@Override
protected boolean isMultiTermAware() {
return true;
}
},

INDIC_NORMALIZATION(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new IndicNormalizationFilter(tokenStream);
}
@Override
protected boolean isMultiTermAware() {
return true;
}
},

SORANI_NORMALIZATION(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new SoraniNormalizationFilter(tokenStream);
}
@Override
protected boolean isMultiTermAware() {
return true;
}
},

SCANDINAVIAN_NORMALIZATION(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ScandinavianNormalizationFilter(tokenStream);
}
@Override
protected boolean isMultiTermAware() {
return true;
}
},

SCANDINAVIAN_FOLDING(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ScandinavianFoldingFilter(tokenStream);
}
@Override
protected boolean isMultiTermAware() {
return true;
}
},

APOSTROPHE(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new ApostropheFilter(tokenStream);
}
},

CJK_WIDTH(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new CJKWidthFilter(tokenStream);
}
@Override
protected boolean isMultiTermAware() {
return true;
}
},

DECIMAL_DIGIT(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new DecimalDigitFilter(tokenStream);
}
@Override
protected boolean isMultiTermAware() {
return true;
}
},

CJK_BIGRAM(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new CJKBigramFilter(tokenStream);
}
},

DELIMITED_PAYLOAD_FILTER(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new DelimitedPayloadTokenFilter(tokenStream, DelimitedPayloadTokenFilterFactory.DEFAULT_DELIMITER, DelimitedPayloadTokenFilterFactory.DEFAULT_ENCODER);
}
},

LIMIT(CachingStrategy.ONE) {
@Override
public TokenStream create(TokenStream tokenStream, Version version) {
return new LimitTokenCountFilter(tokenStream, LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT, LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS);
}
},

;
};

protected boolean isMultiTermAware() {
return false;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@
import static org.hamcrest.Matchers.equalTo;

public abstract class AbstractTermVectorsTestCase extends ESIntegTestCase {

protected static class TestFieldSetting {
public final String name;
public final boolean storedOffset;
Expand Down Expand Up @@ -211,7 +210,7 @@ protected void createIndexBasedOnFieldSettings(String index, String alias, TestF
Settings.Builder settings = Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.tv_test.tokenizer", "standard")
.putArray("index.analysis.analyzer.tv_test.filter", "type_as_payload", "lowercase");
.putArray("index.analysis.analyzer.tv_test.filter", "lowercase");
assertAcked(prepareCreate(index).addMapping("type1", mappingBuilder).setSettings(settings).addAlias(new Alias(alias)));
}

Expand Down Expand Up @@ -395,11 +394,7 @@ protected void validateResponse(TermVectorsResponse esResponse, Fields luceneFie
assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.startOffset(), equalTo(-1));
assertThat("Missing offset test failed" + failDesc, esDocsPosEnum.endOffset(), equalTo(-1));
}
if (field.storedPayloads && testConfig.requestPayloads) {
assertThat("Payload test failed" + failDesc, luceneDocsPosEnum.getPayload(), equalTo(esDocsPosEnum.getPayload()));
} else {
assertThat("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload(), equalTo(null));
}
assertNull("Missing payload test failed" + failDesc, esDocsPosEnum.getPayload());
}
}
assertNull("Es returned terms are done but lucene isn't", luceneTermEnum.next());
Expand Down
Loading