From f660b393719654b9f2c2fe6e9bbabd41e2e179d9 Mon Sep 17 00:00:00 2001 From: olcbean Date: Thu, 16 Nov 2017 09:26:20 +0100 Subject: [PATCH 1/3] replacing levenstein with levensHtein --- .../suggest/phrase/DirectCandidateGeneratorBuilder.java | 4 ++-- .../search/suggest/term/TermSuggestionBuilder.java | 8 ++++---- .../suggest/phrase/DirectCandidateGeneratorTests.java | 4 ++-- .../search/suggest/term/StringDistanceImplTests.java | 8 ++++---- .../search/suggest/term/TermSuggestionBuilderTests.java | 2 +- docs/reference/search/suggesters/term-suggest.asciidoc | 2 +- 6 files changed, 14 insertions(+), 14 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java index 52d1126e43490..ca67362cdc343 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java @@ -211,7 +211,7 @@ String sort() { * string distance for terms inside the index. *
  • damerau_levenshtein - String distance algorithm * based on Damerau-Levenshtein algorithm. - *
  • levenstein - String distance algorithm based on + *
  • levenshtein - String distance algorithm based on * Levenstein edit distance algorithm. *
  • jarowinkler - String distance algorithm based on * Jaro-Winkler algorithm. @@ -464,7 +464,7 @@ private static StringDistance resolveDistance(String distanceVal) { return DirectSpellChecker.INTERNAL_LEVENSHTEIN; } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) { return new LuceneLevenshteinDistance(); - } else if ("levenstein".equals(distanceVal)) { + } else if ("levenshtein".equals(distanceVal)) { return new LevensteinDistance(); // TODO Jaro and Winkler are 2 people - so apply same naming logic // as damerau_levenshtein diff --git a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java index f701ff3642624..9390d94039264 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java @@ -214,7 +214,7 @@ public SortBy sort() { * string distance for terms inside the index. *
  • damerau_levenshtein - String distance algorithm based on * Damerau-Levenshtein algorithm. - *
  • levenstein - String distance algorithm based on + *
  • levenshtein - String distance algorithm based on * Levenstein edit distance algorithm. *
  • jarowinkler - String distance algorithm based on * Jaro-Winkler algorithm. @@ -544,7 +544,7 @@ public StringDistance toLucene() { } }, /** String distance algorithm based on Levenstein edit distance algorithm. */ - LEVENSTEIN { + LEVENSHTEIN { @Override public StringDistance toLucene() { return new LevensteinDistance(); @@ -583,8 +583,8 @@ public static StringDistanceImpl resolve(final String str) { case "damerau_levenshtein": case "damerauLevenshtein": return DAMERAU_LEVENSHTEIN; - case "levenstein": - return LEVENSTEIN; + case "levenshtein": + return LEVENSHTEIN; case "ngram": return NGRAM; case "jarowinkler": diff --git a/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java b/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java index c92fba09d8cb4..4a299efbda22b 100644 --- a/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java +++ b/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java @@ -89,7 +89,7 @@ private static DirectCandidateGeneratorBuilder mutate(DirectCandidateGeneratorBu mutators.add(() -> mutation.preFilter(original.preFilter() == null ? "preFilter" : original.preFilter() + "_other")); mutators.add(() -> mutation.sort(original.sort() == null ? "score" : original.sort() + "_other")); mutators.add( - () -> mutation.stringDistance(original.stringDistance() == null ? "levenstein" : original.stringDistance() + "_other")); + () -> mutation.stringDistance(original.stringDistance() == null ? "levenshtein" : original.stringDistance() + "_other")); mutators.add(() -> mutation.suggestMode(original.suggestMode() == null ? "missing" : original.suggestMode() + "_other")); return randomFrom(mutators).get(); } @@ -189,7 +189,7 @@ public static DirectCandidateGeneratorBuilder randomCandidateGenerator() { maybeSet(generator::postFilter, randomAlphaOfLengthBetween(1, 20)); maybeSet(generator::size, randomIntBetween(1, 20)); maybeSet(generator::sort, randomFrom("score", "frequency")); - maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenstein", "jarowinkler", "ngram")); + maybeSet(generator::stringDistance, randomFrom("internal", "damerau_levenshtein", "levenshtein", "jarowinkler", "ngram")); maybeSet(generator::suggestMode, randomFrom("missing", "popular", "always")); return generator; } diff --git a/core/src/test/java/org/elasticsearch/search/suggest/term/StringDistanceImplTests.java b/core/src/test/java/org/elasticsearch/search/suggest/term/StringDistanceImplTests.java index b94b42741b071..04c7be55f66ce 100644 --- a/core/src/test/java/org/elasticsearch/search/suggest/term/StringDistanceImplTests.java +++ b/core/src/test/java/org/elasticsearch/search/suggest/term/StringDistanceImplTests.java @@ -38,7 +38,7 @@ public StringDistanceImplTests() { public void testValidOrdinals() { assertThat(StringDistanceImpl.INTERNAL.ordinal(), equalTo(0)); assertThat(StringDistanceImpl.DAMERAU_LEVENSHTEIN.ordinal(), equalTo(1)); - assertThat(StringDistanceImpl.LEVENSTEIN.ordinal(), equalTo(2)); + assertThat(StringDistanceImpl.LEVENSHTEIN.ordinal(), equalTo(2)); assertThat(StringDistanceImpl.JAROWINKLER.ordinal(), equalTo(3)); assertThat(StringDistanceImpl.NGRAM.ordinal(), equalTo(4)); } @@ -47,7 +47,7 @@ public void testValidOrdinals() { public void testFromString() { assertThat(StringDistanceImpl.resolve("internal"), equalTo(StringDistanceImpl.INTERNAL)); assertThat(StringDistanceImpl.resolve("damerau_levenshtein"), equalTo(StringDistanceImpl.DAMERAU_LEVENSHTEIN)); - assertThat(StringDistanceImpl.resolve("levenstein"), equalTo(StringDistanceImpl.LEVENSTEIN)); + assertThat(StringDistanceImpl.resolve("levenshtein"), equalTo(StringDistanceImpl.LEVENSHTEIN)); assertThat(StringDistanceImpl.resolve("jarowinkler"), equalTo(StringDistanceImpl.JAROWINKLER)); assertThat(StringDistanceImpl.resolve("ngram"), equalTo(StringDistanceImpl.NGRAM)); final String doesntExist = "doesnt_exist"; @@ -68,7 +68,7 @@ public void testFromString() { public void testWriteTo() throws IOException { assertWriteToStream(StringDistanceImpl.INTERNAL, 0); assertWriteToStream(StringDistanceImpl.DAMERAU_LEVENSHTEIN, 1); - assertWriteToStream(StringDistanceImpl.LEVENSTEIN, 2); + assertWriteToStream(StringDistanceImpl.LEVENSHTEIN, 2); assertWriteToStream(StringDistanceImpl.JAROWINKLER, 3); assertWriteToStream(StringDistanceImpl.NGRAM, 4); } @@ -77,7 +77,7 @@ public void testWriteTo() throws IOException { public void testReadFrom() throws IOException { assertReadFromStream(0, StringDistanceImpl.INTERNAL); assertReadFromStream(1, StringDistanceImpl.DAMERAU_LEVENSHTEIN); - assertReadFromStream(2, StringDistanceImpl.LEVENSTEIN); + assertReadFromStream(2, StringDistanceImpl.LEVENSHTEIN); assertReadFromStream(3, StringDistanceImpl.JAROWINKLER); assertReadFromStream(4, StringDistanceImpl.NGRAM); } diff --git a/core/src/test/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilderTests.java b/core/src/test/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilderTests.java index e366a5c45089f..1e18fc1253bcb 100644 --- a/core/src/test/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilderTests.java +++ b/core/src/test/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilderTests.java @@ -99,7 +99,7 @@ private static StringDistanceImpl randomStringDistance() { switch (randomVal) { case 0: return StringDistanceImpl.INTERNAL; case 1: return StringDistanceImpl.DAMERAU_LEVENSHTEIN; - case 2: return StringDistanceImpl.LEVENSTEIN; + case 2: return StringDistanceImpl.LEVENSHTEIN; case 3: return StringDistanceImpl.JAROWINKLER; case 4: return StringDistanceImpl.NGRAM; default: throw new IllegalArgumentException("No string distance algorithm with an ordinal of " + randomVal); diff --git a/docs/reference/search/suggesters/term-suggest.asciidoc b/docs/reference/search/suggesters/term-suggest.asciidoc index f76b17e0ed298..d8feeaf1760d5 100644 --- a/docs/reference/search/suggesters/term-suggest.asciidoc +++ b/docs/reference/search/suggesters/term-suggest.asciidoc @@ -116,7 +116,7 @@ doesn't take the query into account that is part of request. for comparing string distance for terms inside the index. `damerau_levenshtein` - String distance algorithm based on Damerau-Levenshtein algorithm. - `levenstein` - String distance algorithm based on Levenstein edit distance + `levenshtein` - String distance algorithm based on Levenshtein edit distance algorithm. `jarowinkler` - String distance algorithm based on Jaro-Winkler algorithm. `ngram` - String distance algorithm based on character n-grams. From 0d89c26830ca10af4ca0a92ba1701129727a90f2 Mon Sep 17 00:00:00 2001 From: olcbean Date: Fri, 17 Nov 2017 14:12:48 +0100 Subject: [PATCH 2/3] deprecate `levestein` --- .../DirectCandidateGeneratorBuilder.java | 12 ++++++++-- .../suggest/term/TermSuggestionBuilder.java | 12 ++++++++-- .../phrase/DirectCandidateGeneratorTests.java | 7 ++++++ .../suggest/term/StringDistanceImplTests.java | 23 +++++++++---------- 4 files changed, 38 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java index ca67362cdc343..b5d4ff716e5c3 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorBuilder.java @@ -31,6 +31,8 @@ import org.elasticsearch.common.ParseField; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.logging.DeprecationLogger; +import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.xcontent.ConstructingObjectParser; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; @@ -45,6 +47,9 @@ public final class DirectCandidateGeneratorBuilder implements CandidateGenerator { + private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger( + Loggers.getLogger(DirectCandidateGeneratorBuilder.class)); + private static final String TYPE = "direct_generator"; public static final ParseField DIRECT_GENERATOR_FIELD = new ParseField(TYPE); @@ -212,7 +217,7 @@ String sort() { *
  • damerau_levenshtein - String distance algorithm * based on Damerau-Levenshtein algorithm. *
  • levenshtein - String distance algorithm based on - * Levenstein edit distance algorithm. + * Levenshtein edit distance algorithm. *
  • jarowinkler - String distance algorithm based on * Jaro-Winkler algorithm. *
  • ngram - String distance algorithm based on character @@ -458,12 +463,15 @@ private static SuggestMode resolveSuggestMode(String suggestMode) { } } - private static StringDistance resolveDistance(String distanceVal) { + static StringDistance resolveDistance(String distanceVal) { distanceVal = distanceVal.toLowerCase(Locale.US); if ("internal".equals(distanceVal)) { return DirectSpellChecker.INTERNAL_LEVENSHTEIN; } else if ("damerau_levenshtein".equals(distanceVal) || "damerauLevenshtein".equals(distanceVal)) { return new LuceneLevenshteinDistance(); + } else if ("levenstein".equals(distanceVal)) { + DEPRECATION_LOGGER.deprecated("Deprecated distance [levenstein] used, replaced by [levenshtein]"); + return new LevensteinDistance(); } else if ("levenshtein".equals(distanceVal)) { return new LevensteinDistance(); // TODO Jaro and Winkler are 2 people - so apply same naming logic diff --git a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java index 9390d94039264..e8c8d6c3ae195 100644 --- a/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java +++ b/core/src/main/java/org/elasticsearch/search/suggest/term/TermSuggestionBuilder.java @@ -30,6 +30,8 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.common.logging.DeprecationLogger; +import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.query.QueryShardContext; @@ -66,6 +68,9 @@ * global options, but are only applicable for this suggestion. */ public class TermSuggestionBuilder extends SuggestionBuilder { + + private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(TermSuggestionBuilder.class)); + private static final String SUGGESTION_NAME = "term"; private SuggestMode suggestMode = SuggestMode.MISSING; @@ -215,7 +220,7 @@ public SortBy sort() { *
  • damerau_levenshtein - String distance algorithm based on * Damerau-Levenshtein algorithm. *
  • levenshtein - String distance algorithm based on - * Levenstein edit distance algorithm. + * Levenshtein edit distance algorithm. *
  • jarowinkler - String distance algorithm based on * Jaro-Winkler algorithm. *
  • ngram - String distance algorithm based on character @@ -543,7 +548,7 @@ public StringDistance toLucene() { return new LuceneLevenshteinDistance(); } }, - /** String distance algorithm based on Levenstein edit distance algorithm. */ + /** String distance algorithm based on Levenshtein edit distance algorithm. */ LEVENSHTEIN { @Override public StringDistance toLucene() { @@ -583,6 +588,9 @@ public static StringDistanceImpl resolve(final String str) { case "damerau_levenshtein": case "damerauLevenshtein": return DAMERAU_LEVENSHTEIN; + case "levenstein": + DEPRECATION_LOGGER.deprecated("Deprecated distance [levenstein] used, replaced by [levenshtein]"); + return LEVENSHTEIN; case "levenshtein": return LEVENSHTEIN; case "ngram": diff --git a/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java b/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java index 4a299efbda22b..998ea930aaca1 100644 --- a/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java +++ b/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.search.suggest.phrase; +import org.apache.lucene.search.spell.LevensteinDistance; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.xcontent.ToXContent; @@ -38,6 +39,7 @@ import java.util.function.Supplier; import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode; +import static org.hamcrest.Matchers.equalTo; public class DirectCandidateGeneratorTests extends ESTestCase { private static final int NUMBER_OF_RUNS = 20; @@ -65,6 +67,11 @@ public void testEqualsAndHashcode() throws IOException { } } + public void testLevensteinDeprecation() { + assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenstein"), equalTo(new LevensteinDistance())); + assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]"); + } + private static DirectCandidateGeneratorBuilder mutate(DirectCandidateGeneratorBuilder original) throws IOException { DirectCandidateGeneratorBuilder mutation = copy(original); List> mutators = new ArrayList<>(); diff --git a/core/src/test/java/org/elasticsearch/search/suggest/term/StringDistanceImplTests.java b/core/src/test/java/org/elasticsearch/search/suggest/term/StringDistanceImplTests.java index 04c7be55f66ce..42f1928d1dcff 100644 --- a/core/src/test/java/org/elasticsearch/search/suggest/term/StringDistanceImplTests.java +++ b/core/src/test/java/org/elasticsearch/search/suggest/term/StringDistanceImplTests.java @@ -20,10 +20,10 @@ package org.elasticsearch.search.suggest.term; import org.elasticsearch.common.io.stream.AbstractWriteableEnumTestCase; +import org.elasticsearch.search.suggest.term.TermSuggestionBuilder.StringDistanceImpl; import java.io.IOException; -import static org.elasticsearch.search.suggest.term.TermSuggestionBuilder.StringDistanceImpl; import static org.hamcrest.Matchers.equalTo; /** @@ -50,18 +50,17 @@ public void testFromString() { assertThat(StringDistanceImpl.resolve("levenshtein"), equalTo(StringDistanceImpl.LEVENSHTEIN)); assertThat(StringDistanceImpl.resolve("jarowinkler"), equalTo(StringDistanceImpl.JAROWINKLER)); assertThat(StringDistanceImpl.resolve("ngram"), equalTo(StringDistanceImpl.NGRAM)); + final String doesntExist = "doesnt_exist"; - try { - StringDistanceImpl.resolve(doesntExist); - fail("StringDistanceImpl should not have an element " + doesntExist); - } catch (IllegalArgumentException e) { - } - try { - StringDistanceImpl.resolve(null); - fail("StringDistanceImpl.resolve on a null value should throw an exception."); - } catch (NullPointerException e) { - assertThat(e.getMessage(), equalTo("Input string is null")); - } + expectThrows(IllegalArgumentException.class, () -> StringDistanceImpl.resolve(doesntExist)); + + NullPointerException e = expectThrows(NullPointerException.class, () -> StringDistanceImpl.resolve(null)); + assertThat(e.getMessage(), equalTo("Input string is null")); + } + + public void testLevensteinDeprecation() { + assertThat(StringDistanceImpl.resolve("levenstein"), equalTo(StringDistanceImpl.LEVENSHTEIN)); + assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]"); } @Override From 3f281a7fa13cc772997083d30073e037ee9378b1 Mon Sep 17 00:00:00 2001 From: olcbean Date: Sat, 18 Nov 2017 17:53:35 +0100 Subject: [PATCH 3/3] added tests --- .../phrase/DirectCandidateGeneratorTests.java | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java b/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java index 998ea930aaca1..25736c8fec9fd 100644 --- a/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java +++ b/core/src/test/java/org/elasticsearch/search/suggest/phrase/DirectCandidateGeneratorTests.java @@ -19,7 +19,11 @@ package org.elasticsearch.search.suggest.phrase; +import org.apache.lucene.search.spell.DirectSpellChecker; +import org.apache.lucene.search.spell.JaroWinklerDistance; import org.apache.lucene.search.spell.LevensteinDistance; +import org.apache.lucene.search.spell.LuceneLevenshteinDistance; +import org.apache.lucene.search.spell.NGramDistance; import org.elasticsearch.common.ParsingException; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; import org.elasticsearch.common.xcontent.ToXContent; @@ -40,6 +44,7 @@ import static org.elasticsearch.test.EqualsHashCodeTestUtils.checkEqualsAndHashCode; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.core.IsInstanceOf.instanceOf; public class DirectCandidateGeneratorTests extends ESTestCase { private static final int NUMBER_OF_RUNS = 20; @@ -67,8 +72,19 @@ public void testEqualsAndHashcode() throws IOException { } } + public void testFromString() { + assertThat(DirectCandidateGeneratorBuilder.resolveDistance("internal"), equalTo(DirectSpellChecker.INTERNAL_LEVENSHTEIN)); + assertThat(DirectCandidateGeneratorBuilder.resolveDistance("damerau_levenshtein"), instanceOf(LuceneLevenshteinDistance.class)); + assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenshtein"), instanceOf(LevensteinDistance.class)); + assertThat(DirectCandidateGeneratorBuilder.resolveDistance("jaroWinkler"), instanceOf(JaroWinklerDistance.class)); + assertThat(DirectCandidateGeneratorBuilder.resolveDistance("ngram"), instanceOf(NGramDistance.class)); + + expectThrows(IllegalArgumentException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance("doesnt_exist")); + expectThrows(NullPointerException.class, () -> DirectCandidateGeneratorBuilder.resolveDistance(null)); + } + public void testLevensteinDeprecation() { - assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenstein"), equalTo(new LevensteinDistance())); + assertThat(DirectCandidateGeneratorBuilder.resolveDistance("levenstein"), instanceOf(LevensteinDistance.class)); assertWarnings("Deprecated distance [levenstein] used, replaced by [levenshtein]"); }