Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.ja.JapaneseAnalyzer;
import org.apache.lucene.analysis.ja.JapanesePartOfSpeechStopFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
Expand All @@ -38,6 +39,8 @@ public KuromojiPartOfSpeechFilterFactory(IndexSettings indexSettings, Environmen
List<String> wordList = Analysis.getWordList(env, settings, "stoptags");
if (wordList != null) {
stopTags.addAll(wordList);
} else {
stopTags.addAll(JapaneseAnalyzer.getDefaultStopTags());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,21 @@ public void testBaseFormFilterFactory() throws IOException {
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected);
}

public void testPartOfSpeechFilter() throws IOException {
TestAnalysis analysis = createTestAnalysis();
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("kuromoji_part_of_speech");

assertThat(tokenFilter, instanceOf(KuromojiPartOfSpeechFilterFactory.class));

String source = "寿司がおいしいね";
String[] expected_tokens = new String[]{"寿司", "おいしい"};

Tokenizer tokenizer = new JapaneseTokenizer(null, true, JapaneseTokenizer.Mode.SEARCH);
tokenizer.setReader(new StringReader(source));

assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens);
}

public void testReadingFormFilterFactory() throws IOException {
TestAnalysis analysis = createTestAnalysis();
TokenFilterFactory tokenFilter = analysis.tokenFilter.get("kuromoji_rf");
Expand Down Expand Up @@ -208,7 +223,7 @@ public static void assertSimpleTSOutput(TokenStream stream,
int i = 0;
while (stream.incrementToken()) {
assertThat(expected.length, greaterThan(i));
assertThat( "expected different term at index " + i, expected[i++], equalTo(termAttr.toString()));
assertThat("expected different term at index " + i, termAttr.toString(), equalTo(expected[i++]));
}
assertThat("not all tokens produced", i, equalTo(expected.length));
}
Expand Down