Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions buildSrc/src/main/resources/checkstyle_suppressions.xml
Original file line number Diff line number Diff line change
Expand Up @@ -705,12 +705,6 @@
<suppress files="modules[/\\]lang-expression[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]script[/\\]expression[/\\]StoredExpressionTests.java" checks="LineLength" />
<suppress files="modules[/\\]lang-painless[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]painless[/\\]ContextExampleTests.java" checks="LineLength" />
<suppress files="modules[/\\]reindex[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]reindex[/\\]TransportUpdateByQueryAction.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-icu[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]IcuCollationTokenFilterFactory.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-icu[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]IcuFoldingTokenFilterFactory.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-icu[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]IndexableBinaryStringTools.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-kuromoji[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]JapaneseStopTokenFilterFactory.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-kuromoji[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]KuromojiAnalysisTests.java" checks="LineLength" />
<suppress files="plugins[/\\]analysis-phonetic[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]analysis[/\\]PhoneticTokenFilterFactory.java" checks="LineLength" />
<suppress files="plugins[/\\]discovery-ec2[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]discovery[/\\]ec2[/\\]AbstractAwsTestCase.java" checks="LineLength" />
<suppress files="plugins[/\\]discovery-ec2[/\\]src[/\\]test[/\\]java[/\\]org[/\\]elasticsearch[/\\]discovery[/\\]ec2[/\\]AmazonEC2Mock.java" checks="LineLength" />
<suppress files="plugins[/\\]mapper-murmur3[/\\]src[/\\]main[/\\]java[/\\]org[/\\]elasticsearch[/\\]index[/\\]mapper[/\\]murmur3[/\\]Murmur3FieldMapper.java" checks="LineLength" />
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,16 @@

/**
* An ICU based collation token filter. There are two ways to configure collation:
* <p>The first is simply specifying the locale (defaults to the default locale). The {@code language}
* parameter is the lowercase two-letter ISO-639 code. An additional {@code country} and {@code variant}
* <p>The first is simply specifying the locale (defaults to the default
* locale). The {@code language} parameter is the lowercase two-letter
* ISO-639 code. An additional {@code country} and {@code variant}
* can be provided.
* <p>The second option is to specify collation rules as defined in the <a href="http://www.icu-project.org/userguide/Collate_Customization.html">
* Collation customization</a> chapter in icu docs. The {@code rules} parameter can either embed the rules definition
* in the settings or refer to an external location (preferable located under the {@code config} location, relative to it).
* <p>The second option is to specify collation rules as defined in the
* <a href="http://www.icu-project.org/userguide/Collate_Customization.html">
* Collation customization</a> chapter in icu docs. The {@code rules}
* parameter can either embed the rules definition
* in the settings or refer to an external location (preferable located under
* the {@code config} location, relative to it).
*/
public class IcuCollationTokenFilterFactory extends AbstractTokenFilterFactory {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,10 +32,12 @@
* Uses the {@link org.apache.lucene.analysis.icu.ICUFoldingFilter}.
* Applies foldings from UTR#30 Character Foldings.
* <p>
* Can be filtered to handle certain characters in a specified way (see http://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.html)
* Can be filtered to handle certain characters in a specified way
* (see http://icu-project.org/apiref/icu4j/com/ibm/icu/text/UnicodeSet.html)
* E.g national chars that should be retained (filter : "[^åäöÅÄÖ]").
*
* <p>The {@code unicodeSetFilter} attribute can be used to provide the UniCodeSet for filtering.
* <p>The {@code unicodeSetFilter} attribute can be used to provide the
* UniCodeSet for filtering.
*
* @author kimchy (shay.banon)
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,10 @@ public static void encode(byte[] inputArray, int inputOffset,
codingCase = CODING_CASES[caseNum];

if (inputByteNum + 1 < inputLength) { // codingCase.numBytes must be 3
outputArray[outputCharNum++] = (char) ((((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift) + ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)) & (short) 0x7FFF);
outputArray[outputCharNum++] = (char) (
( ((inputArray[inputByteNum] & 0xFF) << codingCase.initialShift)
+ ((inputArray[inputByteNum + 1] & 0xFF) << codingCase.middleShift)
) & (short) 0x7FFF);
// Add trailing char containing the number of full bytes in final char
outputArray[outputCharNum++] = (char) 1;
} else if (inputByteNum < inputLength) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ public JapaneseStopTokenFilterFactory(IndexSettings indexSettings, Environment e
super(indexSettings, name, settings);
this.ignoreCase = settings.getAsBoolean("ignore_case", false);
this.removeTrailing = settings.getAsBoolean("remove_trailing", true);
this.stopWords = Analysis.parseWords(env, settings, "stopwords", JapaneseAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase);
this.stopWords = Analysis.parseWords(env, settings, "stopwords",
JapaneseAnalyzer.getDefaultStopSet(), NAMED_STOP_WORDS, ignoreCase);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,8 @@ public void testKatakanaStemFilter() throws IOException {

// パーティー should be stemmed by default
// (min len) コピー should not be stemmed
String[] expected_tokens_katakana = new String[]{"明後日", "パーティ", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"};
String[] expected_tokens_katakana = new String[] {
"明後日", "パーティ", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"};
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);

tokenFilter = analysis.tokenFilter.get("kuromoji_ks");
Expand All @@ -149,7 +150,8 @@ public void testKatakanaStemFilter() throws IOException {

// パーティー should not be stemmed since min len == 6
// コピー should not be stemmed
expected_tokens_katakana = new String[]{"明後日", "パーティー", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"};
expected_tokens_katakana = new String[] {
"明後日", "パーティー", "に", "行く", "予定", "が", "ある", "図書館", "で", "資料", "を", "コピー", "し", "まし", "た"};
assertSimpleTSOutput(tokenFilter.create(tokenizer), expected_tokens_katakana);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,9 @@ public PhoneticTokenFilterFactory(IndexSettings indexSettings, Environment envir
} else if ("double_metaphone".equalsIgnoreCase(encodername) || "doubleMetaphone".equalsIgnoreCase(encodername)) {
this.encoder = null;
this.maxcodelength = settings.getAsInt("max_code_len", 4);
} else if ("bm".equalsIgnoreCase(encodername) || "beider_morse".equalsIgnoreCase(encodername) || "beidermorse".equalsIgnoreCase(encodername)) {
} else if ("bm".equalsIgnoreCase(encodername)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is a fair bit of camelCase left in this file which would be good to look at at some point but I don't think now is that time.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

++

|| "beider_morse".equalsIgnoreCase(encodername)
|| "beidermorse".equalsIgnoreCase(encodername)) {
this.encoder = null;
this.languageset = settings.getAsList("languageset");
String ruleType = settings.get("rule_type", "approx");
Expand Down