Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,19 @@
*/
public class ClassicSimilarityProvider extends AbstractSimilarityProvider {

private final Version indexCreatedVersion;
private final ClassicSimilarity similarity = new ClassicSimilarity();

public ClassicSimilarityProvider(String name, Settings settings, Settings indexSettings) {
super(name);
indexCreatedVersion = Version.indexCreated(indexSettings);
if (indexCreatedVersion.onOrAfter(Version.V_6_0_0_beta2)) {
throw new IllegalArgumentException("The [classic] similarity is disallowed as of 6.0. It is advised that you use the [BM25] " +
"similarity instead which usually provides better scores. In case you really need to keep the same scores as the " +
"[classic] similarity, it is possible to reimplement it using the [scripted] similarity.");
}
boolean discountOverlaps = settings.getAsBooleanLenientForPreEs6Indices(
Version.indexCreated(indexSettings), "discount_overlaps", true, new DeprecationLogger(ESLoggerFactory.getLogger(getClass())));
indexCreatedVersion, "discount_overlaps", true, new DeprecationLogger(ESLoggerFactory.getLogger(getClass())));
this.similarity.setDiscountOverlaps(discountOverlaps);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,17 @@ public final class SimilarityService extends AbstractIndexComponent {
private final Similarity defaultSimilarity;
private final Map<String, SimilarityProvider> similarities;
private static final Map<String, SimilarityProvider.Factory> DEFAULTS;
private static final Map<String, SimilarityProvider.Factory> V5X_DEFAULTS;
public static final Map<String, SimilarityProvider.Factory> BUILT_IN;
static {
Map<String, SimilarityProvider.Factory> defaults = new HashMap<>();
Map<String, SimilarityProvider.Factory> buildIn = new HashMap<>();
defaults.put("classic",
(name, settings, indexSettings, scriptService) -> new ClassicSimilarityProvider(name, settings, indexSettings));
defaults.put("BM25",
(name, settings, indexSettings, scriptService) -> new BM25SimilarityProvider(name, settings, indexSettings));
defaults.put("boolean",
(name, settings, indexSettings, scriptService) -> new BooleanSimilarityProvider(name, settings, indexSettings));
// We keep the definition of the classic similarity on 6.x indices so that users get get a nicer exception
// if they try to define a custom classic similarity
buildIn.put("classic",
(name, settings, indexSettings, scriptService) -> new ClassicSimilarityProvider(name, settings, indexSettings));
buildIn.put("BM25",
Expand All @@ -70,6 +71,11 @@ public final class SimilarityService extends AbstractIndexComponent {
buildIn.put("scripted", ScriptedSimilarityProvider::new);
DEFAULTS = Collections.unmodifiableMap(defaults);
BUILT_IN = Collections.unmodifiableMap(buildIn);

Map<String, SimilarityProvider.Factory> v5xDefaults = new HashMap<>(defaults);
v5xDefaults.put("classic",
(name, settings, indexSettings, scriptService) -> new ClassicSimilarityProvider(name, settings, indexSettings));
V5X_DEFAULTS = Collections.unmodifiableMap(v5xDefaults);
}

public SimilarityService(IndexSettings indexSettings, ScriptService scriptService,
Expand All @@ -94,8 +100,11 @@ public SimilarityService(IndexSettings indexSettings, ScriptService scriptServic
SimilarityProvider.Factory factory = similarities.getOrDefault(typeName, defaultFactory);
providers.put(name, factory.create(name, providerSettings, indexSettings.getSettings(), scriptService));
}
Map<String, SimilarityProvider.Factory> defaults = indexSettings.getIndexVersionCreated().onOrAfter(Version.V_6_0_0_beta2)
? DEFAULTS
: V5X_DEFAULTS;
Map<String, SimilarityProvider> providerMapping = addSimilarities(similaritySettings, indexSettings.getSettings(), scriptService,
DEFAULTS);
defaults);
for (Map.Entry<String, SimilarityProvider> entry : providerMapping.entrySet()) {
// Avoid overwriting custom providers for indices older that v5.0
if (providers.containsKey(entry.getKey()) && indexSettings.getIndexVersionCreated().before(Version.V_5_0_0_alpha1)) {
Expand Down Expand Up @@ -133,7 +142,14 @@ private Map<String, SimilarityProvider> addSimilarities(Map<String, Settings> s
}

public SimilarityProvider getSimilarity(String name) {
return similarities.get(name);
SimilarityProvider provider = similarities.get(name);
if (provider == null && "classic".equals(name)) {
// TODO: remove this special case on 7.x
throw new IllegalArgumentException("The [classic] similarity is disallowed as of 6.0. It is advised that you use the [BM25] " +
"similarity instead which usually provides better scores. In case you really need to keep the same scores as the " +
"[classic] similarity, it is possible to reimplement it using the [scripted] similarity.");
}
return provider;
}

Similarity getDefaultSimilarity() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
package org.elasticsearch.index.similarity;

import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.LMDirichletSimilarity;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.test.ESTestCase;
Expand Down Expand Up @@ -50,10 +50,10 @@ public void testOverrideBuiltInSimilarity() {
}

public void testOverrideDefaultSimilarity() {
Settings settings = Settings.builder().put("index.similarity.default.type", "classic")
Settings settings = Settings.builder().put("index.similarity.default.type", "LMDirichlet")
.build();
IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test", settings);
SimilarityService service = new SimilarityService(indexSettings, null, Collections.emptyMap());
assertTrue(service.getDefaultSimilarity() instanceof ClassicSimilarity);
assertTrue(service.getDefaultSimilarity() instanceof LMDirichletSimilarity);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@
import org.apache.lucene.search.similarities.LMJelinekMercerSimilarity;
import org.apache.lucene.search.similarities.LambdaTTF;
import org.apache.lucene.search.similarities.NormalizationH2;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentFactory;
Expand All @@ -43,6 +45,7 @@
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.test.InternalSettingsPlugin;
import org.hamcrest.Matchers;

import java.io.IOException;
import java.util.Collection;
Expand All @@ -59,10 +62,16 @@ protected Collection<Class<? extends Plugin>> getPlugins() {

public void testResolveDefaultSimilarities() {
SimilarityService similarityService = createIndex("foo").similarityService();
assertThat(similarityService.getSimilarity("classic").get(), instanceOf(ClassicSimilarity.class));
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> similarityService.getSimilarity("classic").get());
assertThat(e.getMessage(), Matchers.containsString("The [classic] similarity is disallowed"));
assertThat(similarityService.getSimilarity("BM25").get(), instanceOf(BM25Similarity.class));
assertThat(similarityService.getSimilarity("boolean").get(), instanceOf(BooleanSimilarity.class));
assertThat(similarityService.getSimilarity("default"), equalTo(null));

SimilarityService legacySimilarityService = createIndex("bar", Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_5_6_0)
.build()).similarityService();
assertThat(legacySimilarityService.getSimilarity("classic").get(), instanceOf(ClassicSimilarity.class));
}

public void testResolveSimilaritiesFromMapping_classic() throws IOException {
Expand All @@ -76,7 +85,13 @@ public void testResolveSimilaritiesFromMapping_classic() throws IOException {
.put("index.similarity.my_similarity.type", "classic")
.put("index.similarity.my_similarity.discount_overlaps", false)
.build();
IndexService indexService = createIndex("foo", indexSettings);
IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> createIndex("foo", indexSettings));
assertThat(e.getMessage(), Matchers.containsString("The [classic] similarity is disallowed"));

Settings legacyIndexSettings = Settings.builder()
.put(indexSettings)
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_5_6_0).build();
IndexService indexService = createIndex("foo", legacyIndexSettings);
DocumentMapper documentMapper = indexService.mapperService().documentMapperParser().parse("type", new CompressedXContent(mapping));
assertThat(documentMapper.mappers().getMapper("field1").fieldType().similarity(), instanceOf(ClassicSimilarityProvider.class));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public void testCustomBM25Similarity() throws Exception {
.field("type", "text")
.endObject()
.startObject("field2")
.field("similarity", "classic")
.field("similarity", "boolean")
.field("type", "text")
.endObject()
.endObject()
Expand Down
14 changes: 0 additions & 14 deletions docs/reference/index-modules/similarity.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -72,20 +72,6 @@ This similarity has the following options:

Type name: `BM25`

[float]
[[classic-similarity]]
==== Classic similarity

The classic similarity that is based on the TF/IDF model. This
similarity has the following option:

`discount_overlaps`::
Determines whether overlap tokens (Tokens with
0 position increment) are ignored when computing norm. By default this
is true, meaning overlap tokens do not count when computing norms.

Type name: `classic`

[float]
[[drf]]
==== DFR similarity
Expand Down
14 changes: 2 additions & 12 deletions docs/reference/mapping/params/similarity.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,6 @@ configuration are:
See {defguide}/pluggable-similarites.html[Pluggable Similarity Algorithms]
for more information.

`classic`::
The TF/IDF algorithm which used to be the default in Elasticsearch and
Lucene. See {defguide}/practical-scoring-function.html[Lucene’s Practical Scoring Function]
for more information.

`boolean`::
A simple boolean similarity, which is used when full-text ranking is not needed
and the score should only be based on whether the query terms match or not.
Expand All @@ -44,13 +39,9 @@ PUT my_index
"default_field": { <1>
"type": "text"
},
"classic_field": {
"type": "text",
"similarity": "classic" <2>
},
"boolean_sim_field": {
"type": "text",
"similarity": "boolean" <3>
"similarity": "boolean" <2>
}
}
}
Expand All @@ -59,5 +50,4 @@ PUT my_index
--------------------------------------------------
// CONSOLE
<1> The `default_field` uses the `BM25` similarity.
<2> The `classic_field` uses the `classic` similarity (ie TF/IDF).
<3> The `boolean_sim_field` uses the `boolean` similarity.
<2> The `boolean_sim_field` uses the `boolean` similarity.
7 changes: 7 additions & 0 deletions docs/reference/migration/migrate_6_0/settings.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,10 @@ they are replaced with `script.allowed_types` and `script.allowed_contexts`.
The `discovery.type` settings no longer supports the values `gce`, `aws` and `ec2`.
Integration with these platforms should be done by setting the `discovery.zen.hosts_provider` setting to
one of those values.

==== Similarity settings

The `classic` similarity has been disallowed on 6.0+ indices. We advise to
use <<bm25,`bm25`>> instead, which usually produces better scores. In case you
absolutely need to keep the same scores, you can reimplement the `classic`
similarity using the new <<scripted_similarity,`scripted`>> similarity.