Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion buildSrc/version.properties
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# When updating elasticsearch, please update 'rest' version in core/src/main/resources/org/elasticsearch/bootstrap/test-framework.policy
elasticsearch = 6.0.0-alpha1
lucene = 6.4.1
lucene = 6.5.0-snapshot-f919485

# optional dependencies
spatial4j = 0.6
Expand Down
1 change: 0 additions & 1 deletion core/licenses/lucene-analyzers-common-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
886c1da9adc3347f61ab95ecbf4dbeeaa0e7acb2
1 change: 0 additions & 1 deletion core/licenses/lucene-backward-codecs-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
df9e94f63ad7d9188f14820c435ea1dc3c28d87a
1 change: 0 additions & 1 deletion core/licenses/lucene-core-6.4.1.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions core/licenses/lucene-core-6.5.0-snapshot-f919485.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
3539f8dc9c3ed8ebe90afcb3daa2e9afcf5108d1
1 change: 0 additions & 1 deletion core/licenses/lucene-grouping-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
da76338e4f299963da9d7ab33dae7586dfc902c2
1 change: 0 additions & 1 deletion core/licenses/lucene-highlighter-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f6318d120236c7ac03fca6bf98825b4cb4347fc8
1 change: 0 additions & 1 deletion core/licenses/lucene-join-6.4.1.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions core/licenses/lucene-join-6.5.0-snapshot-f919485.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
68f045ff272e10c307fe25a1867c2948b614b57c
1 change: 0 additions & 1 deletion core/licenses/lucene-memory-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
b58a7a15267614a9a14f7cf6257454e0c24b146d
1 change: 0 additions & 1 deletion core/licenses/lucene-misc-6.4.1.jar.sha1

This file was deleted.

1 change: 1 addition & 0 deletions core/licenses/lucene-misc-6.5.0-snapshot-f919485.jar.sha1
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
d5f00fcd00fee6906b563d201bc00bdea7a92baa
1 change: 0 additions & 1 deletion core/licenses/lucene-queries-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2664901a494d87e9f4cef65be14cca918da7c4f5
1 change: 0 additions & 1 deletion core/licenses/lucene-queryparser-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
476a79293f9a15ea1ee5f93684587205d03480d1
1 change: 0 additions & 1 deletion core/licenses/lucene-sandbox-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f4dd70223178cca067b0cade4e58c4d82bec87d6
1 change: 0 additions & 1 deletion core/licenses/lucene-spatial-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
72c4ec5d811480164db556b54c7a76bd3ea16bd6
1 change: 0 additions & 1 deletion core/licenses/lucene-spatial-extras-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
f7af3755fdd09df7c258c655aff03ddef9536a04
1 change: 0 additions & 1 deletion core/licenses/lucene-spatial3d-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
2bf820109203b990e93a05dade8dcebec6aeb71a
1 change: 0 additions & 1 deletion core/licenses/lucene-suggest-6.4.1.jar.sha1

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fc1f32923ee68761ee05051f4ef6f4a4ab3acdec
4 changes: 3 additions & 1 deletion core/src/main/java/org/elasticsearch/Version.java
Original file line number Diff line number Diff line change
Expand Up @@ -112,9 +112,11 @@ public class Version implements Comparable<Version> {
public static final Version V_5_2_1_UNRELEASED = new Version(V_5_2_1_ID_UNRELEASED, org.apache.lucene.util.Version.LUCENE_6_4_1);
public static final int V_5_3_0_ID_UNRELEASED = 5030099;
public static final Version V_5_3_0_UNRELEASED = new Version(V_5_3_0_ID_UNRELEASED, org.apache.lucene.util.Version.LUCENE_6_4_1);
public static final int V_5_4_0_ID_UNRELEASED = 5040099;
public static final Version V_5_4_0_UNRELEASED = new Version(V_5_4_0_ID_UNRELEASED, org.apache.lucene.util.Version.LUCENE_6_5_0);
public static final int V_6_0_0_alpha1_ID_UNRELEASED = 6000001;
public static final Version V_6_0_0_alpha1_UNRELEASED =
new Version(V_6_0_0_alpha1_ID_UNRELEASED, org.apache.lucene.util.Version.LUCENE_6_4_1);
new Version(V_6_0_0_alpha1_ID_UNRELEASED, org.apache.lucene.util.Version.LUCENE_6_5_0);
public static final Version CURRENT = V_6_0_0_alpha1_UNRELEASED;

// unreleased versions must be added to the above list with the suffix _UNRELEASED (with the exception of CURRENT)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.synonym.FlattenGraphFilter;
import org.apache.lucene.analysis.core.FlattenGraphFilter;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this deserves a separate PR, WDYT ? We need to document how this works and make sure that we add all the warnings regarding the restrictions of using this filter in conjunction with others.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can do that. I added it because the test that checks whether all analysis components are exposed failed otherwise.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok I created #23104 to track the inclusion of this new filter. We can add the documentation and tests in a follow-up

* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.elasticsearch.index.analysis;

import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter;
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;

import java.util.List;
import java.util.Set;

import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.CATENATE_ALL;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.CATENATE_NUMBERS;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.CATENATE_WORDS;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.GENERATE_NUMBER_PARTS;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.GENERATE_WORD_PARTS;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.PRESERVE_ORIGINAL;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.SPLIT_ON_CASE_CHANGE;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.SPLIT_ON_NUMERICS;
import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE;
import static org.elasticsearch.index.analysis.WordDelimiterTokenFilterFactory.parseTypes;

public class WordDelimiterGraphTokenFilterFactory extends AbstractTokenFilterFactory {

private final byte[] charTypeTable;
private final int flags;
private final CharArraySet protoWords;

public WordDelimiterGraphTokenFilterFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);

// Sample Format for the type table:
// $ => DIGIT
// % => DIGIT
// . => DIGIT
// \u002C => DIGIT
// \u200D => ALPHANUM
List<String> charTypeTableValues = Analysis.getWordList(env, settings, "type_table");
if (charTypeTableValues == null) {
this.charTypeTable = WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE;
} else {
this.charTypeTable = parseTypes(charTypeTableValues);
}
int flags = 0;
// If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot"
flags |= getFlag(GENERATE_WORD_PARTS, settings, "generate_word_parts", true);
// If set, causes number subwords to be generated: "500-42" => "500" "42"
flags |= getFlag(GENERATE_NUMBER_PARTS, settings, "generate_number_parts", true);
// 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi"
flags |= getFlag(CATENATE_WORDS, settings, "catenate_words", false);
// If set, causes maximum runs of number parts to be catenated: "500-42" => "50042"
flags |= getFlag(CATENATE_NUMBERS, settings, "catenate_numbers", false);
// If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000"
flags |= getFlag(CATENATE_ALL, settings, "catenate_all", false);
// 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards)
flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings, "split_on_case_change", true);
// If set, includes original words in subwords: "500-42" => "500" "42" "500-42"
flags |= getFlag(PRESERVE_ORIGINAL, settings, "preserve_original", false);
// 1, causes "j2se" to be three tokens; "j" "2" "se"
flags |= getFlag(SPLIT_ON_NUMERICS, settings, "split_on_numerics", true);
// If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil"
flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings, "stem_english_possessive", true);
// If not null is the set of tokens to protect from being delimited
Set<?> protectedWords = Analysis.getWordSet(env, indexSettings.getIndexVersionCreated(), settings, "protected_words");
this.protoWords = protectedWords == null ? null : CharArraySet.copy(protectedWords);
this.flags = flags;
}

@Override
public TokenStream create(TokenStream tokenStream) {
return new WordDelimiterGraphFilter(tokenStream, charTypeTable, flags, protoWords);
}

private int getFlag(int flag, Settings settings, String key, boolean defaultValue) {
if (settings.getAsBoolean(key, defaultValue)) {
return flag;
}
return 0;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ public int getFlag(int flag, Settings settings, String key, boolean defaultValue
/**
* parses a list of MappingCharFilter style rules into a custom byte[] type table
*/
private byte[] parseTypes(Collection<String> rules) {
static byte[] parseTypes(Collection<String> rules) {
SortedMap<Character, Byte> typeMap = new TreeMap<>();
for (String rule : rules) {
Matcher m = typePattern.matcher(rule);
Expand All @@ -137,7 +137,7 @@ private byte[] parseTypes(Collection<String> rules) {
return types;
}

private Byte parseType(String s) {
private static Byte parseType(String s) {
if (s.equals("LOWER"))
return WordDelimiterFilter.LOWER;
else if (s.equals("UPPER"))
Expand All @@ -154,9 +154,8 @@ else if (s.equals("SUBWORD_DELIM"))
return null;
}

char[] out = new char[256];

private String parseString(String s) {
private static String parseString(String s) {
char[] out = new char[256];
int readPos = 0;
int len = s.length();
int writePos = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ protected SortedBinaryDocValues getValues(LeafReaderContext context) throws IOEx
protected void setScorer(Scorer scorer) {}

@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());

final boolean sortMissingLast = sortMissingLast(missingValue) ^ reversed;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ protected SortedNumericDoubleValues getValues(LeafReaderContext context) throws
protected void setScorer(Scorer scorer) {}

@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());

final double dMissingValue = (Double) missingObject(missingValue, reversed);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ public SortField.Type reducedType() {
}

@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());

final float dMissingValue = (Float) missingObject(missingValue, reversed);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public SortField.Type reducedType() {
}

@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) throws IOException {
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
assert indexFieldData == null || fieldname.equals(indexFieldData.getFieldName());

final Long dMissingValue = (Long) missingObject(missingValue, reversed);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.Lucene;
Expand Down Expand Up @@ -102,42 +101,38 @@ public InternalAggregation doReduce(List<InternalAggregation> aggregations, Redu
final TopDocs reducedTopDocs;
final TopDocs[] shardDocs;

try {
if (topDocs instanceof TopFieldDocs) {
Sort sort = new Sort(((TopFieldDocs) topDocs).fields);
shardDocs = new TopFieldDocs[aggregations.size()];
for (int i = 0; i < shardDocs.length; i++) {
InternalTopHits topHitsAgg = (InternalTopHits) aggregations.get(i);
shardDocs[i] = (TopFieldDocs) topHitsAgg.topDocs;
shardHits[i] = topHitsAgg.searchHits;
}
reducedTopDocs = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardDocs);
} else {
shardDocs = new TopDocs[aggregations.size()];
for (int i = 0; i < shardDocs.length; i++) {
InternalTopHits topHitsAgg = (InternalTopHits) aggregations.get(i);
shardDocs[i] = topHitsAgg.topDocs;
shardHits[i] = topHitsAgg.searchHits;
}
reducedTopDocs = TopDocs.merge(from, size, shardDocs);
if (topDocs instanceof TopFieldDocs) {
Sort sort = new Sort(((TopFieldDocs) topDocs).fields);
shardDocs = new TopFieldDocs[aggregations.size()];
for (int i = 0; i < shardDocs.length; i++) {
InternalTopHits topHitsAgg = (InternalTopHits) aggregations.get(i);
shardDocs[i] = (TopFieldDocs) topHitsAgg.topDocs;
shardHits[i] = topHitsAgg.searchHits;
}

final int[] tracker = new int[shardHits.length];
SearchHit[] hits = new SearchHit[reducedTopDocs.scoreDocs.length];
for (int i = 0; i < reducedTopDocs.scoreDocs.length; i++) {
ScoreDoc scoreDoc = reducedTopDocs.scoreDocs[i];
int position;
do {
position = tracker[scoreDoc.shardIndex]++;
} while (shardDocs[scoreDoc.shardIndex].scoreDocs[position] != scoreDoc);
hits[i] = shardHits[scoreDoc.shardIndex].getAt(position);
reducedTopDocs = TopDocs.merge(sort, from, size, (TopFieldDocs[]) shardDocs);
} else {
shardDocs = new TopDocs[aggregations.size()];
for (int i = 0; i < shardDocs.length; i++) {
InternalTopHits topHitsAgg = (InternalTopHits) aggregations.get(i);
shardDocs[i] = topHitsAgg.topDocs;
shardHits[i] = topHitsAgg.searchHits;
}
return new InternalTopHits(name, from, size, reducedTopDocs, new SearchHits(hits, reducedTopDocs.totalHits,
reducedTopDocs.getMaxScore()),
pipelineAggregators(), getMetaData());
} catch (IOException e) {
throw ExceptionsHelper.convertToElastic(e);
reducedTopDocs = TopDocs.merge(from, size, shardDocs);
}

final int[] tracker = new int[shardHits.length];
SearchHit[] hits = new SearchHit[reducedTopDocs.scoreDocs.length];
for (int i = 0; i < reducedTopDocs.scoreDocs.length; i++) {
ScoreDoc scoreDoc = reducedTopDocs.scoreDocs[i];
int position;
do {
position = tracker[scoreDoc.shardIndex]++;
} while (shardDocs[scoreDoc.shardIndex].scoreDocs[position] != scoreDoc);
hits[i] = shardHits[scoreDoc.shardIndex].getAt(position);
}
return new InternalTopHits(name, from, size, reducedTopDocs, new SearchHits(hits, reducedTopDocs.totalHits,
reducedTopDocs.getMaxScore()),
pipelineAggregators(), getMetaData());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -554,8 +554,7 @@ public SortField.Type reducedType() {
}

@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed)
throws IOException {
public FieldComparator<?> newComparator(String fieldname, int numHits, int sortPos, boolean reversed) {
return new FieldComparator.DoubleComparator(numHits, null, null) {
@Override
protected NumericDocValues getNumericDocValues(LeafReaderContext context, String field)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ grant codeBase "${codebase.securesm-1.1.jar}" {
//// Very special jar permissions:
//// These are dangerous permissions that we don't want to grant to everything.

grant codeBase "${codebase.lucene-core-6.4.1.jar}" {
grant codeBase "${codebase.lucene-core-6.5.0-snapshot-f919485.jar}" {
// needed to allow MMapDirectory's "unmap hack" (die unmap hack, die)
// java 8 package
permission java.lang.RuntimePermission "accessClassInPackage.sun.misc";
Expand All @@ -42,7 +42,7 @@ grant codeBase "${codebase.lucene-core-6.4.1.jar}" {
permission java.lang.RuntimePermission "accessDeclaredMembers";
};

grant codeBase "${codebase.lucene-misc-6.4.1.jar}" {
grant codeBase "${codebase.lucene-misc-6.5.0-snapshot-f919485.jar}" {
// needed to allow shard shrinking to use hard-links if possible via lucenes HardlinkCopyDirectoryWrapper
permission java.nio.file.LinkPermission "hard";
};
Expand Down
Loading