Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -276,6 +276,8 @@ public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
filters.put("sorani_normalization", SoraniNormalizationFilterFactory::new);
filters.put("stemmer_override", requiresAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
filters.put("stemmer", StemmerTokenFilterFactory::new);
filters.put("synonym", requiresAnalysisSettings(SynonymTokenFilterFactory::new));
filters.put("synonym_graph", requiresAnalysisSettings(SynonymGraphTokenFilterFactory::new));
filters.put("trim", TrimTokenFilterFactory::new);
filters.put("truncate", requiresAnalysisSettings(TruncateTokenFilterFactory::new));
filters.put("unique", UniqueTokenFilterFactory::new);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
Expand All @@ -26,16 +26,18 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;

import java.io.IOException;
import java.util.List;
import java.util.function.Function;

public class SynonymGraphTokenFilterFactory extends SynonymTokenFilterFactory {

public SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
String name, Settings settings) throws IOException {
super(indexSettings, env, analysisRegistry, name, settings);
public SynonymGraphTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
super(indexSettings, env, name, settings);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
Expand All @@ -26,8 +26,13 @@
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.Analysis;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;

import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.List;
Expand All @@ -41,8 +46,8 @@ public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory {
protected final Settings settings;
protected final Environment environment;

public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry,
String name, Settings settings) throws IOException {
public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env,
String name, Settings settings) {
super(indexSettings, name, settings);
this.settings = settings;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
import org.apache.lucene.analysis.reverse.ReverseStringFilterFactory;
import org.apache.lucene.analysis.snowball.SnowballPorterFilterFactory;
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
import org.elasticsearch.indices.analysis.AnalysisFactoryTestCase;

import java.util.List;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
* under the License.
*/

package org.elasticsearch.index.analysis;
package org.elasticsearch.analysis.common;

import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -223,6 +223,68 @@
- match: { tokens.0.token: Foo }
- match: { tokens.1.token: Bar! }

---
"synonym":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_synonym:
type: synonym
synonyms: ["car,auto"]

- do:
indices.analyze:
index: test
body:
text: what car magazine
tokenizer: whitespace
filter: [ my_synonym ]
- length: { tokens: 4 }
- match: { tokens.0.token: what }
- match: { tokens.0.position: 0 }
- match: { tokens.1.token: car }
- match: { tokens.1.position: 1 }
- match: { tokens.2.token: auto }
- match: { tokens.2.position: 1 }
- match: { tokens.3.token: magazine }
- match: { tokens.3.position: 2 }

---
"synonym_graph":
- do:
indices.create:
index: test
body:
settings:
analysis:
filter:
my_graph_synonym:
type: synonym_graph
synonyms: [ "guinea pig,cavy" ]

- do:
indices.analyze:
index: test
body:
text: my guinea pig snores
tokenizer: whitespace
filter: [ my_graph_synonym ]
- length: { tokens: 5 }
- match: { tokens.0.token: my }
- match: { tokens.1.token: cavy }
- match: { tokens.1.position: 1 }
- match: { tokens.1.positionLength: 2 }
- match: { tokens.2.token: guinea }
- match: { tokens.2.position: 1 }
- match: { tokens.3.token: pig }
- match: { tokens.3.position: 2 }
- match: { tokens.4.token: snores }
- match: { tokens.4.position: 3 }

---
"synonym_graph and flatten_graph":
- do:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -158,16 +158,8 @@ public IndexAnalyzers build(IndexSettings indexSettings) throws IOException {

public Map<String, TokenFilterFactory> buildTokenFilterFactories(IndexSettings indexSettings) throws IOException {
final Map<String, Settings> tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER);
Map<String, AnalysisModule.AnalysisProvider<TokenFilterFactory>> tokenFilters = new HashMap<>(this.tokenFilters);
/*
* synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index.
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
* hide internal data-structures as much as possible.
*/
tokenFilters.put("synonym", requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)));
tokenFilters.put("synonym_graph", requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings)));

return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters);
return buildMapping(Component.FILTER, indexSettings, tokenFiltersSettings,
Collections.unmodifiableMap(this.tokenFilters), prebuiltAnalysis.preConfiguredTokenFilters);
}

public Map<String, TokenizerFactory> buildTokenizerFactories(IndexSettings indexSettings) throws IOException {
Expand Down Expand Up @@ -222,18 +214,7 @@ public AnalysisProvider<TokenFilterFactory> getTokenFilterProvider(String tokenF
if (tokenFilterSettings.containsKey(tokenFilter)) {
Settings currentSettings = tokenFilterSettings.get(tokenFilter);
String typeName = currentSettings.get("type");
/*
* synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index.
* instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and
* hide internal data-structures as much as possible.
*/
if ("synonym".equals(typeName)) {
return requiresAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings));
} else if ("synonym_graph".equals(typeName)) {
return requiresAnalysisSettings((is, env, name, settings) -> new SynonymGraphTokenFilterFactory(is, env, this, name, settings));
} else {
return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName);
}
return getAnalysisProvider(Component.FILTER, tokenFilters, tokenFilter, typeName);
} else {
return getTokenFilterProvider(tokenFilter);
}
Expand All @@ -257,19 +238,6 @@ public AnalysisProvider<CharFilterFactory> getCharFilterProvider(String charFilt
}
}

private static <T> AnalysisModule.AnalysisProvider<T> requiresAnalysisSettings(AnalysisModule.AnalysisProvider<T> provider) {
return new AnalysisModule.AnalysisProvider<T>() {
@Override
public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException {
return provider.get(indexSettings, environment, name, settings);
}
@Override
public boolean requiresAnalysisSettings() {
return true;
}
};
}

enum Component {
ANALYZER {
@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

package org.elasticsearch.index.mapper;

import org.apache.lucene.analysis.MockSynonymAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.FieldType;
Expand Down Expand Up @@ -55,6 +56,7 @@
import org.elasticsearch.index.mapper.TextFieldMapper.TextFieldType;
import org.elasticsearch.index.query.MatchPhraseQueryBuilder;
import org.elasticsearch.index.query.QueryShardContext;
import org.elasticsearch.index.search.MatchQuery;
import org.elasticsearch.index.shard.IndexShard;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
Expand Down Expand Up @@ -82,10 +84,6 @@ public class TextFieldMapperTests extends ESSingleNodeTestCase {
@Before
public void setup() {
Settings settings = Settings.builder()
.put("index.analysis.filter.mySynonyms.type", "synonym")
.putList("index.analysis.filter.mySynonyms.synonyms", Collections.singletonList("car, auto"))
.put("index.analysis.analyzer.synonym.tokenizer", "standard")
.put("index.analysis.analyzer.synonym.filter", "mySynonyms")
// Stop filter remains in server as it is part of lucene-core
.put("index.analysis.analyzer.my_stop_analyzer.tokenizer", "standard")
.put("index.analysis.analyzer.my_stop_analyzer.filter", "stop")
Expand Down Expand Up @@ -739,7 +737,7 @@ public void testFastPhraseMapping() throws IOException {
.endObject()
.startObject("synfield")
.field("type", "text")
.field("analyzer", "synonym")
.field("analyzer", "standard") // will be replaced with MockSynonymAnalyzer
.field("index_phrases", true)
.endObject()
.endObject()
Expand All @@ -766,11 +764,13 @@ public void testFastPhraseMapping() throws IOException {
assertThat(q5,
is(new PhraseQuery.Builder().add(new Term("field", "sparkle")).add(new Term("field", "stopword"), 2).build()));

Query q6 = new MatchPhraseQueryBuilder("synfield", "motor car").toQuery(queryShardContext);
MatchQuery matchQuery = new MatchQuery(queryShardContext);
matchQuery.setAnalyzer(new MockSynonymAnalyzer());
Query q6 = matchQuery.parse(MatchQuery.Type.PHRASE, "synfield", "motor dogs");
assertThat(q6, is(new MultiPhraseQuery.Builder()
.add(new Term[]{
new Term("synfield._index_phrase", "motor car"),
new Term("synfield._index_phrase", "motor auto")})
new Term("synfield._index_phrase", "motor dogs"),
new Term("synfield._index_phrase", "motor dog")})
.build()));

ParsedDocument doc = mapper.parse(SourceToParse.source("test", "type", "1", BytesReference
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,11 +73,7 @@ protected Collection<Class<? extends Plugin>> getPlugins() {

@Before
public void setup() throws IOException {
Settings settings = Settings.builder()
.put("index.analysis.filter.syns.type","synonym")
.putList("index.analysis.filter.syns.synonyms","quick,fast")
.put("index.analysis.analyzer.syns.tokenizer","standard")
.put("index.analysis.analyzer.syns.filter","syns").build();
Settings settings = Settings.builder().build();
IndexService indexService = createIndex("test", settings);
MapperService mapperService = indexService.mapperService();
String mapping = "{\n" +
Expand All @@ -87,11 +83,11 @@ public void setup() throws IOException {
" \"properties\":{\n" +
" \"first\": {\n" +
" \"type\":\"text\",\n" +
" \"analyzer\":\"syns\"\n" +
" \"analyzer\":\"standard\"\n" +
" }," +
" \"last\": {\n" +
" \"type\":\"text\",\n" +
" \"analyzer\":\"syns\"\n" +
" \"analyzer\":\"standard\"\n" +
" }" +
" }" +
" }\n" +
Expand Down Expand Up @@ -221,25 +217,27 @@ public void testMultiMatchCrossFieldsWithSynonyms() throws IOException {
QueryShardContext queryShardContext = indexService.newQueryShardContext(
randomInt(20), null, () -> { throw new UnsupportedOperationException(); }, null);

MultiMatchQuery parser = new MultiMatchQuery(queryShardContext);
parser.setAnalyzer(new MockSynonymAnalyzer());
Map<String, Float> fieldNames = new HashMap<>();
fieldNames.put("name.first", 1.0f);

// check that synonym query is used for a single field
Query parsedQuery =
multiMatchQuery("quick").field("name.first")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
Query parsedQuery = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "dogs", null);
Term[] terms = new Term[2];
terms[0] = new Term("name.first", "quick");
terms[1] = new Term("name.first", "fast");
terms[0] = new Term("name.first", "dog");
terms[1] = new Term("name.first", "dogs");
Query expectedQuery = new SynonymQuery(terms);
assertThat(parsedQuery, equalTo(expectedQuery));

// check that blended term query is used for multiple fields
parsedQuery =
multiMatchQuery("quick").field("name.first").field("name.last")
.type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext);
fieldNames.put("name.last", 1.0f);
parsedQuery = parser.parse(MultiMatchQueryBuilder.Type.CROSS_FIELDS, fieldNames, "dogs", null);
terms = new Term[4];
terms[0] = new Term("name.first", "quick");
terms[1] = new Term("name.first", "fast");
terms[2] = new Term("name.last", "quick");
terms[3] = new Term("name.last", "fast");
terms[0] = new Term("name.first", "dog");
terms[1] = new Term("name.first", "dogs");
terms[2] = new Term("name.last", "dog");
terms[3] = new Term("name.last", "dogs");
float[] boosts = new float[4];
Arrays.fill(boosts, 1.0f);
expectedQuery = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,6 @@
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
import org.elasticsearch.index.analysis.SynonymGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.SynonymTokenFilterFactory;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;

Expand Down Expand Up @@ -169,8 +167,8 @@ private static String toCamelCase(String s) {
.put("stemmeroverride", MovedToAnalysisCommon.class)
.put("stop", StopTokenFilterFactory.class)
.put("swedishlightstem", MovedToAnalysisCommon.class)
.put("synonym", SynonymTokenFilterFactory.class)
.put("synonymgraph", SynonymGraphTokenFilterFactory.class)
.put("synonym", MovedToAnalysisCommon.class)
.put("synonymgraph", MovedToAnalysisCommon.class)
.put("trim", MovedToAnalysisCommon.class)
.put("truncate", MovedToAnalysisCommon.class)
.put("turkishlowercase", MovedToAnalysisCommon.class)
Expand Down