From 52d1c28923f1fa23809a156e33b3a8407eda72f5 Mon Sep 17 00:00:00 2001 From: Claudio Bley Date: Tue, 12 Sep 2017 13:55:18 +0200 Subject: [PATCH 1/2] [TEST] Fix parameter order to `assertThat` call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The order was reversed, as the expected value was given for the actual value and vice versa. This led to a confusing assertion error message: ``` FAILURE 0.04s J1 | KuromojiAnalysisTests.testPartOfSpeechFilter <<< FAILURES! > Throwable #1: java.lang.AssertionError: expected different term at index 1 > Expected: "が" > but: was "おいしい" ``` when the string "が" was actually not expected. --- .../org/elasticsearch/index/analysis/KuromojiAnalysisTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index c0271c997849f..6f3298f1b46be 100644 --- a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -208,7 +208,7 @@ public static void assertSimpleTSOutput(TokenStream stream, int i = 0; while (stream.incrementToken()) { assertThat(expected.length, greaterThan(i)); - assertThat( "expected different term at index " + i, expected[i++], equalTo(termAttr.toString())); + assertThat("expected different term at index " + i, termAttr.toString(), equalTo(expected[i++])); } assertThat("not all tokens produced", i, equalTo(expected.length)); } From 7b4e0f7f5a816a77007c56fcfaa15120f0943c1a Mon Sep 17 00:00:00 2001 From: Claudio Bley Date: Tue, 12 Sep 2017 13:55:10 +0200 Subject: [PATCH 2/2] Use default stop-tags for Kuromoji part-of-speech filter * add new test which checks that part-of-speech tokens are removed when using the kuromoji_part_of_speech filter * initialize the default stop-tags in `KuromojiPartOfSpeechFilterFactory` if the `stoptags` are not given in the config --- .../KuromojiPartOfSpeechFilterFactory.java | 3 +++ .../index/analysis/KuromojiAnalysisTests.java | 15 +++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java index e3a58360e9b5f..bea12470cb026 100644 --- a/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java +++ b/plugins/analysis-kuromoji/src/main/java/org/elasticsearch/index/analysis/KuromojiPartOfSpeechFilterFactory.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.ja.JapaneseAnalyzer; import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; @@ -38,6 +39,8 @@ public KuromojiPartOfSpeechFilterFactory(IndexSettings indexSettings, Environmen List wordList = Analysis.getWordList(env, settings, "stoptags"); if (wordList != null) { stopTags.addAll(wordList); + } else { + stopTags.addAll(JapaneseAnalyzer.getDefaultStopTags()); } } diff --git a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java index 6f3298f1b46be..1be8a22fb2aaf 100644 --- a/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java +++ b/plugins/analysis-kuromoji/src/test/java/org/elasticsearch/index/analysis/KuromojiAnalysisTests.java @@ -93,6 +93,21 @@ public void testBaseFormFilterFactory() throws IOException { assertSimpleTSOutput(tokenFilter.create(tokenizer), expected); } + public void testPartOfSpeechFilter() throws IOException { + TestAnalysis analysis = createTestAnalysis(); + TokenFilterFactory tokenFilter = analysis.tokenFilter.get("kuromoji_part_of_speech"); + + assertThat(tokenFilter, instanceOf(KuromojiPartOfSpeechFilterFactory.class)); + + String source = "寿司がおいしいね"; + String[] expected_tokens = new String[]{"寿司", "おいしい"}; + + Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH); + tokenizer.setReader(new StringReader(source)); + + assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens); + } + public void testReadingFormFilterFactory() throws IOException { TestAnalysis analysis = createTestAnalysis(); TokenFilterFactory tokenFilter = analysis.tokenFilter.get("kuromoji_rf");