|
19 | 19 |
|
20 | 20 | package org.elasticsearch.index.analysis; |
21 | 21 |
|
| 22 | +import org.apache.lucene.analysis.BaseTokenStreamTestCase; |
| 23 | +import org.apache.lucene.analysis.Tokenizer; |
| 24 | +import org.apache.lucene.analysis.core.WhitespaceTokenizer; |
22 | 25 | import org.elasticsearch.Version; |
23 | 26 | import org.elasticsearch.cluster.metadata.IndexMetaData; |
24 | 27 | import org.elasticsearch.common.settings.Settings; |
25 | 28 | import org.elasticsearch.index.Index; |
26 | 29 | import org.elasticsearch.plugin.analysis.AnalysisPhoneticPlugin; |
27 | 30 | import org.elasticsearch.test.ESTestCase; |
28 | 31 | import org.hamcrest.MatcherAssert; |
| 32 | +import org.junit.Before; |
29 | 33 |
|
30 | 34 | import java.io.IOException; |
| 35 | +import java.io.StringReader; |
31 | 36 |
|
32 | 37 | import static org.hamcrest.Matchers.instanceOf; |
33 | 38 |
|
34 | 39 | /** |
35 | 40 | */ |
36 | 41 | public class SimplePhoneticAnalysisTests extends ESTestCase { |
37 | | - public void testPhoneticTokenFilterFactory() throws IOException { |
| 42 | + |
| 43 | + private TestAnalysis analysis; |
| 44 | + |
| 45 | + @Before |
| 46 | + public void setup() throws IOException { |
38 | 47 | String yaml = "/org/elasticsearch/index/analysis/phonetic-1.yml"; |
39 | 48 | Settings settings = Settings.builder().loadFromStream(yaml, getClass().getResourceAsStream(yaml)) |
40 | 49 | .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) |
41 | 50 | .build(); |
42 | | - TestAnalysis analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisPhoneticPlugin()); |
| 51 | + this.analysis = createTestAnalysis(new Index("test", "_na_"), settings, new AnalysisPhoneticPlugin()); |
| 52 | + } |
| 53 | + |
| 54 | + public void testPhoneticTokenFilterFactory() throws IOException { |
43 | 55 | TokenFilterFactory filterFactory = analysis.tokenFilter.get("phonetic"); |
44 | 56 | MatcherAssert.assertThat(filterFactory, instanceOf(PhoneticTokenFilterFactory.class)); |
45 | 57 | } |
| 58 | + |
| 59 | + public void testPhoneticTokenFilterBeiderMorseNoLanguage() throws IOException { |
| 60 | + TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilter"); |
| 61 | + Tokenizer tokenizer = new WhitespaceTokenizer(); |
| 62 | + tokenizer.setReader(new StringReader("ABADIAS")); |
| 63 | + String[] expected = new String[] { "abYdias", "abYdios", "abadia", "abadiaS", "abadias", "abadio", "abadioS", "abadios", "abodia", |
| 64 | + "abodiaS", "abodias", "abodio", "abodioS", "abodios", "avadias", "avadios", "avodias", "avodios", "obadia", "obadiaS", |
| 65 | + "obadias", "obadio", "obadioS", "obadios", "obodia", "obodiaS", "obodias", "obodioS" }; |
| 66 | + BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected); |
| 67 | + } |
| 68 | + |
| 69 | + public void testPhoneticTokenFilterBeiderMorseWithLanguage() throws IOException { |
| 70 | + TokenFilterFactory filterFactory = analysis.tokenFilter.get("beidermorsefilterfrench"); |
| 71 | + Tokenizer tokenizer = new WhitespaceTokenizer(); |
| 72 | + tokenizer.setReader(new StringReader("Rimbault")); |
| 73 | + String[] expected = new String[] { "rimbD", "rimbDlt", "rimba", "rimbalt", "rimbo", "rimbolt", "rimbu", "rimbult", "rmbD", "rmbDlt", |
| 74 | + "rmba", "rmbalt", "rmbo", "rmbolt", "rmbu", "rmbult" }; |
| 75 | + BaseTokenStreamTestCase.assertTokenStreamContents(filterFactory.create(tokenizer), expected); |
| 76 | + } |
46 | 77 | } |
0 commit comments