Skip to content

Commit 7316b66

Browse files
authored
Replace custom sort field with SortedSetSortField and SortedNumericSortField when possible (#23827)
Currently for field sorting we always use a custom sort field and a custom comparator source. Though for numeric fields this custom sort field could be replaced with a standard SortedNumericSortField unless the field is nested especially since we removed the FieldData for numerics. We can also use a SortedSetSortField for string sort based on doc_values when the field is not nested. This change replaces IndexFieldData#comparatorSource with IndexFieldData#sortField that returns a Sorted{Set,Numeric}SortField when possible or a custom sort field when the field sort spec is not handled by the SortedSortFields.
1 parent bdb1cab commit 7316b66

24 files changed

+195
-62
lines changed

core/src/main/java/org/elasticsearch/common/lucene/Lucene.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@
5757
import org.apache.lucene.search.TwoPhaseIterator;
5858
import org.apache.lucene.search.Weight;
5959
import org.apache.lucene.search.grouping.CollapseTopFieldDocs;
60+
import org.apache.lucene.search.SortedNumericSortField;
61+
import org.apache.lucene.search.SortedSetSortField;
6062
import org.apache.lucene.store.Directory;
6163
import org.apache.lucene.store.IOContext;
6264
import org.apache.lucene.store.IndexInput;
@@ -552,7 +554,22 @@ public static void writeSortField(StreamOutput out, SortField sortField) throws
552554
SortField newSortField = new SortField(sortField.getField(), SortField.Type.DOUBLE);
553555
newSortField.setMissingValue(sortField.getMissingValue());
554556
sortField = newSortField;
557+
} else if (sortField.getClass() == SortedSetSortField.class) {
558+
// for multi-valued sort field, we replace the SortedSetSortField with a simple SortField.
559+
// It works because the sort field is only used to merge results from different shards.
560+
SortField newSortField = new SortField(sortField.getField(), SortField.Type.STRING, sortField.getReverse());
561+
newSortField.setMissingValue(sortField.getMissingValue());
562+
sortField = newSortField;
563+
} else if (sortField.getClass() == SortedNumericSortField.class) {
564+
// for multi-valued sort field, we replace the SortedSetSortField with a simple SortField.
565+
// It works because the sort field is only used to merge results from different shards.
566+
SortField newSortField = new SortField(sortField.getField(),
567+
((SortedNumericSortField) sortField).getNumericType(),
568+
sortField.getReverse());
569+
newSortField.setMissingValue(sortField.getMissingValue());
570+
sortField = newSortField;
555571
}
572+
556573
if (sortField.getClass() != SortField.class) {
557574
throw new IllegalArgumentException("Cannot serialize SortField impl [" + sortField + "]");
558575
}

core/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@ public static MemoryStorageFormat fromString(String string) {
8585
FD loadDirect(LeafReaderContext context) throws Exception;
8686

8787
/**
88-
* Comparator used for sorting.
88+
* Returns the {@link SortField} to used for sorting.
8989
*/
90-
XFieldComparatorSource comparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested);
90+
SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse);
9191

9292
/**
9393
* Clears any resources associated with this field data.
@@ -136,17 +136,17 @@ public DocIdSetIterator innerDocs(LeafReaderContext ctx) throws IOException {
136136
}
137137

138138
/** Whether missing values should be sorted first. */
139-
protected final boolean sortMissingFirst(Object missingValue) {
139+
public final boolean sortMissingFirst(Object missingValue) {
140140
return "_first".equals(missingValue);
141141
}
142142

143143
/** Whether missing values should be sorted last, this is the default. */
144-
protected final boolean sortMissingLast(Object missingValue) {
144+
public final boolean sortMissingLast(Object missingValue) {
145145
return missingValue == null || "_last".equals(missingValue);
146146
}
147147

148148
/** Return the missing object value according to the reduced type of the comparator. */
149-
protected final Object missingObject(Object missingValue, boolean reversed) {
149+
public final Object missingObject(Object missingValue, boolean reversed) {
150150
if (sortMissingFirst(missingValue) || sortMissingLast(missingValue)) {
151151
final boolean min = sortMissingFirst(missingValue) ^ reversed;
152152
switch (reducedType()) {

core/src/main/java/org/elasticsearch/index/fielddata/ordinals/GlobalOrdinalsIndexFieldData.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import org.apache.lucene.index.DirectoryReader;
2222
import org.apache.lucene.index.LeafReaderContext;
23+
import org.apache.lucene.search.SortField;
2324
import org.apache.lucene.util.Accountable;
2425
import org.elasticsearch.common.Nullable;
2526
import org.elasticsearch.index.AbstractIndexComponent;
@@ -68,7 +69,7 @@ public String getFieldName() {
6869
}
6970

7071
@Override
71-
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
72+
public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
7273
throw new UnsupportedOperationException("no global ordinals sorting yet");
7374
}
7475

core/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractGeoPointDVIndexFieldData.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import org.apache.lucene.index.DocValues;
2323
import org.apache.lucene.index.LeafReaderContext;
24+
import org.apache.lucene.search.SortField;
2425
import org.elasticsearch.common.Nullable;
2526
import org.elasticsearch.index.Index;
2627
import org.elasticsearch.index.IndexSettings;
@@ -43,7 +44,7 @@ public abstract class AbstractGeoPointDVIndexFieldData extends DocValuesIndexFie
4344
}
4445

4546
@Override
46-
public final XFieldComparatorSource comparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
47+
public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
4748
throw new IllegalArgumentException("can't sort on geo_point field without using specific sorting feature, like geo_distance");
4849
}
4950

core/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractIndexGeoPointFieldData.java

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
package org.elasticsearch.index.fielddata.plain;
2121

22+
import org.apache.lucene.search.SortField;
2223
import org.apache.lucene.spatial.geopoint.document.GeoPointField;
2324
import org.apache.lucene.util.BytesRef;
2425
import org.apache.lucene.util.BytesRefIterator;
@@ -28,6 +29,7 @@
2829
import org.elasticsearch.common.geo.GeoPoint;
2930
import org.elasticsearch.index.IndexSettings;
3031
import org.elasticsearch.index.fielddata.AtomicGeoPointFieldData;
32+
import org.elasticsearch.index.fielddata.IndexFieldData;
3133
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
3234
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
3335
import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
@@ -104,7 +106,7 @@ public GeoPoint next() throws IOException {
104106
}
105107

106108
@Override
107-
public final XFieldComparatorSource comparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
109+
public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
108110
throw new IllegalArgumentException("can't sort on geo_point field without using specific sorting feature, like geo_distance");
109111
}
110112

core/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractIndexOrdinalsFieldData.java

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,6 @@ protected AbstractIndexOrdinalsFieldData(IndexSettings indexSettings, String fie
5555
this.minSegmentSize = minSegmentSize;
5656
}
5757

58-
@Override
59-
public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
60-
return new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
61-
}
62-
6358
@Override
6459
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
6560
if (indexReader.leaves().size() <= 1) {

core/src/main/java/org/elasticsearch/index/fielddata/plain/AbstractLatLonPointDVIndexFieldData.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.lucene.index.FieldInfo;
2525
import org.apache.lucene.index.LeafReader;
2626
import org.apache.lucene.index.LeafReaderContext;
27+
import org.apache.lucene.search.SortField;
2728
import org.elasticsearch.ElasticsearchException;
2829
import org.elasticsearch.common.Nullable;
2930
import org.elasticsearch.index.Index;
@@ -46,8 +47,7 @@ public abstract class AbstractLatLonPointDVIndexFieldData extends DocValuesIndex
4647
}
4748

4849
@Override
49-
public final XFieldComparatorSource comparatorSource(@Nullable Object missingValue, MultiValueMode sortMode,
50-
XFieldComparatorSource.Nested nested) {
50+
public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) {
5151
throw new IllegalArgumentException("can't sort on geo_point field without using specific sorting feature, like geo_distance");
5252
}
5353

core/src/main/java/org/elasticsearch/index/fielddata/plain/BinaryDVIndexFieldData.java

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,12 @@
2020
package org.elasticsearch.index.fielddata.plain;
2121

2222
import org.apache.lucene.index.LeafReaderContext;
23+
import org.apache.lucene.search.SortField;
24+
import org.apache.lucene.search.SortedSetSortField;
25+
import org.apache.lucene.search.SortedSetSelector;
26+
import org.elasticsearch.common.Nullable;
2327
import org.elasticsearch.index.Index;
2428
import org.elasticsearch.index.fielddata.IndexFieldData;
25-
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
2629
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
2730
import org.elasticsearch.search.MultiValueMode;
2831

@@ -43,7 +46,21 @@ public BinaryDVAtomicFieldData loadDirect(LeafReaderContext context) throws Exce
4346
}
4447

4548
@Override
46-
public org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource comparatorSource(Object missingValue, MultiValueMode sortMode, Nested nested) {
47-
return new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
49+
public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) {
50+
XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
51+
/**
52+
* Check if we can use a simple {@link SortedSetSortField} compatible with index sorting and
53+
* returns a custom sort field otherwise.
54+
*/
55+
if (nested != null ||
56+
(sortMode != MultiValueMode.MAX && sortMode != MultiValueMode.MIN) ||
57+
(source.sortMissingFirst(missingValue) == false && source.sortMissingLast(missingValue) == false)) {
58+
return new SortField(getFieldName(), source, reverse);
59+
}
60+
SortField sortField = new SortedSetSortField(fieldName, reverse,
61+
sortMode == MultiValueMode.MAX ? SortedSetSelector.Type.MAX : SortedSetSelector.Type.MIN);
62+
sortField.setMissingValue(source.sortMissingLast(missingValue) ^ reverse ?
63+
SortedSetSortField.STRING_LAST : SortedSetSortField.STRING_FIRST);
64+
return sortField;
4865
}
4966
}

core/src/main/java/org/elasticsearch/index/fielddata/plain/BytesBinaryDVIndexFieldData.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121

2222
import org.apache.lucene.index.DocValues;
2323
import org.apache.lucene.index.LeafReaderContext;
24+
import org.apache.lucene.search.SortField;
2425
import org.elasticsearch.common.Nullable;
2526
import org.elasticsearch.index.Index;
2627
import org.elasticsearch.index.IndexSettings;
@@ -41,7 +42,7 @@ public BytesBinaryDVIndexFieldData(Index index, String fieldName) {
4142
}
4243

4344
@Override
44-
public final XFieldComparatorSource comparatorSource(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested) {
45+
public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
4546
throw new IllegalArgumentException("can't sort on binary field");
4647
}
4748

core/src/main/java/org/elasticsearch/index/fielddata/plain/IndexIndexFieldData.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,21 @@
2424
import org.apache.lucene.index.LeafReaderContext;
2525
import org.apache.lucene.index.RandomAccessOrds;
2626
import org.apache.lucene.index.SortedDocValues;
27+
import org.apache.lucene.search.SortField;
2728
import org.apache.lucene.util.Accountable;
2829
import org.apache.lucene.util.BytesRef;
30+
import org.elasticsearch.common.Nullable;
2931
import org.elasticsearch.index.IndexSettings;
3032
import org.elasticsearch.index.fielddata.AtomicOrdinalsFieldData;
3133
import org.elasticsearch.index.fielddata.IndexFieldData;
3234
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
3335
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
36+
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
3437
import org.elasticsearch.index.mapper.MappedFieldType;
3538
import org.elasticsearch.index.mapper.MapperService;
3639
import org.elasticsearch.index.mapper.TextFieldMapper;
3740
import org.elasticsearch.indices.breaker.CircuitBreakerService;
41+
import org.elasticsearch.search.MultiValueMode;
3842

3943
import java.util.Collection;
4044
import java.util.Collections;
@@ -124,6 +128,12 @@ public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context)
124128
return atomicFieldData;
125129
}
126130

131+
@Override
132+
public SortField sortField(@Nullable Object missingValue, MultiValueMode sortMode, XFieldComparatorSource.Nested nested, boolean reverse) {
133+
final XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
134+
return new SortField(getFieldName(), source, reverse);
135+
}
136+
127137
@Override
128138
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
129139
return this;

0 commit comments

Comments
 (0)