Skip to content

Commit 68d2fa6

Browse files
committed
apply feedback from @jpoutz
1 parent f13dfcd commit 68d2fa6

File tree

2 files changed

+52
-66
lines changed

2 files changed

+52
-66
lines changed

core/src/main/java/org/elasticsearch/index/mapper/Uid.java

Lines changed: 27 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,8 @@
2222
import org.apache.lucene.util.BytesRef;
2323
import org.apache.lucene.util.BytesRefBuilder;
2424
import org.apache.lucene.util.UnicodeUtil;
25-
import org.elasticsearch.common.io.stream.BytesStreamOutput;
2625
import org.elasticsearch.common.lucene.BytesRefs;
2726

28-
import java.io.ByteArrayInputStream;
2927
import java.util.Arrays;
3028
import java.util.Base64;
3129
import java.util.Collection;
@@ -137,36 +135,36 @@ static boolean isURLBase64WithoutPadding(String id) {
137135
// 'xxx=' and 'xxx' could be considered the same id
138136
final int length = id.length();
139137
switch (length & 0x03) {
140-
case 0:
141-
break;
142-
case 1:
143-
return false;
144-
case 2:
145-
// the last 2 symbols (12 bits) are encoding 1 byte (8 bits)
146-
// so the last symbol only actually uses 8-6=2 bits and can only take 4 values
147-
char last = id.charAt(length - 1);
148-
if (last != 'A' && last != 'Q' && last != 'g' && last != 'w') {
138+
case 0:
139+
break;
140+
case 1:
149141
return false;
150-
}
151-
break;
152-
case 3:
153-
// The last 3 symbols (18 bits) are encoding 2 bytes (16 bits)
154-
// so the last symbol only actually uses 16-12=4 bits and can only take 16 values
155-
last = id.charAt(length - 1);
156-
if (last != 'A' && last != 'E' && last != 'I' && last != 'M' && last != 'Q'&& last != 'U'&& last != 'Y'
142+
case 2:
143+
// the last 2 symbols (12 bits) are encoding 1 byte (8 bits)
144+
// so the last symbol only actually uses 8-6=2 bits and can only take 4 values
145+
char last = id.charAt(length - 1);
146+
if (last != 'A' && last != 'Q' && last != 'g' && last != 'w') {
147+
return false;
148+
}
149+
break;
150+
case 3:
151+
// The last 3 symbols (18 bits) are encoding 2 bytes (16 bits)
152+
// so the last symbol only actually uses 16-12=4 bits and can only take 16 values
153+
last = id.charAt(length - 1);
154+
if (last != 'A' && last != 'E' && last != 'I' && last != 'M' && last != 'Q'&& last != 'U'&& last != 'Y'
157155
&& last != 'c'&& last != 'g'&& last != 'k' && last != 'o' && last != 's' && last != 'w'
158156
&& last != '0' && last != '4' && last != '8') {
159-
return false;
160-
}
161-
break;
162-
default:
163-
// number & 0x03 is always in [0,3]
164-
throw new AssertionError("Impossible case");
157+
return false;
158+
}
159+
break;
160+
default:
161+
// number & 0x03 is always in [0,3]
162+
throw new AssertionError("Impossible case");
165163
}
166164
for (int i = 0; i < length; ++i) {
167165
final char c = id.charAt(i);
168166
final boolean allowed =
169-
(c >= '0' && c <= '9') ||
167+
(c >= '0' && c <= '9') ||
170168
(c >= 'A' && c <= 'Z') ||
171169
(c >= 'a' && c <= 'z') ||
172170
c == '-' || c == '_';
@@ -272,17 +270,17 @@ private static String decodeUtf8Id(byte[] idBytes, int offset, int length) {
272270
private static String decodeBase64Id(byte[] idBytes, int offset, int length) {
273271
assert Byte.toUnsignedInt(idBytes[offset]) <= BASE64_ESCAPE;
274272
if (Byte.toUnsignedInt(idBytes[offset]) == BASE64_ESCAPE) {
275-
idBytes = Arrays.copyOfRange(idBytes, offset + 1, length);
276-
} else if (idBytes.length != length || offset != 0) {
277-
idBytes = Arrays.copyOfRange(idBytes, offset, length);
273+
idBytes = Arrays.copyOfRange(idBytes, offset + 1, offset + length);
274+
} else if ((idBytes.length == length && offset == 0) == false) { // no need to copy if it's not a slice
275+
idBytes = Arrays.copyOfRange(idBytes, offset, offset + length);
278276
}
279277
return Base64.getUrlEncoder().withoutPadding().encodeToString(idBytes);
280278
}
281279

282280
/** Decode an indexed id back to its original form.
283281
* @see #encodeId */
284282
public static String decodeId(byte[] idBytes) {
285-
return decodeId(idBytes, 0, idBytes.length);
283+
return decodeId(idBytes, 0, idBytes.length);
286284
}
287285

288286
/** Decode an indexed id back to its original form.

core/src/main/java/org/elasticsearch/index/shard/ShardSplittingQuery.java

Lines changed: 25 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,13 @@
3131
import org.apache.lucene.search.IndexSearcher;
3232
import org.apache.lucene.search.Query;
3333
import org.apache.lucene.search.Scorer;
34+
import org.apache.lucene.search.TwoPhaseIterator;
3435
import org.apache.lucene.search.Weight;
3536
import org.apache.lucene.util.BitSetIterator;
3637
import org.apache.lucene.util.Bits;
3738
import org.apache.lucene.util.BytesRef;
3839
import org.apache.lucene.util.FixedBitSet;
40+
import org.elasticsearch.Version;
3941
import org.elasticsearch.cluster.metadata.IndexMetaData;
4042
import org.elasticsearch.cluster.routing.OperationRouting;
4143
import org.elasticsearch.index.mapper.IdFieldMapper;
@@ -56,6 +58,10 @@ final class ShardSplittingQuery extends Query {
5658
private final int shardId;
5759

5860
ShardSplittingQuery(IndexMetaData indexMetaData, int shardId) {
61+
if (indexMetaData.getCreationVersion().before(Version.V_6_0_0_rc2)) {
62+
throw new IllegalArgumentException("Splitting query can only be executed on an index created with version "
63+
+ Version.V_6_0_0_rc2 + " or higher");
64+
}
5965
this.indexMetaData = indexMetaData;
6066
this.shardId = shardId;
6167
}
@@ -87,7 +93,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
8793
Bits liveDocs = leafReader.getLiveDocs();
8894
Visitor visitor = new Visitor();
8995
return new ConstantScoreScorer(this, score(),
90-
new RoutingPartitionedDocIdSetIterator(leafReader, liveDocs, visitor));
96+
new RoutingPartitionedDocIdSetIterator(leafReader, visitor));
9197
} else {
9298
// in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete
9399
findSplitDocs(RoutingFieldMapper.NAME, ref -> {
@@ -111,6 +117,8 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
111117
}
112118
return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length()));
113119
}
120+
121+
114122
};
115123
}
116124

@@ -121,12 +129,12 @@ public String toString(String field) {
121129

122130
@Override
123131
public boolean equals(Object o) {
124-
return sameClassAs(o);
132+
throw new UnsupportedOperationException("only use this query for deleting documents");
125133
}
126134

127135
@Override
128136
public int hashCode() {
129-
return classHash();
137+
throw new UnsupportedOperationException("only use this query for deleting documents");
130138
}
131139

132140
private static void findSplitDocs(String idField, Predicate<BytesRef> includeInShard,
@@ -184,64 +192,44 @@ public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
184192

185193
@Override
186194
public Status needsField(FieldInfo fieldInfo) throws IOException {
195+
// we don't support 5.x so no need for the uid field
187196
switch (fieldInfo.name) {
188197
case IdFieldMapper.NAME:
189198
case RoutingFieldMapper.NAME:
190199
leftToVisit--;
191200
return Status.YES;
192201
default:
193202
return leftToVisit == 0 ? Status.STOP : Status.NO;
194-
195203
}
196204
}
197205
}
198206

199207
/**
200-
* This DISI visits every live doc and selects all docs that don't belong into this
201-
* shard based on their id and rounting value. This is only used in a routing partitioned index.
208+
* This two phase iterator visits every live doc and selects all docs that don't belong into this
209+
* shard based on their id and routing value. This is only used in a routing partitioned index.
202210
*/
203-
private final class RoutingPartitionedDocIdSetIterator extends DocIdSetIterator {
211+
private final class RoutingPartitionedDocIdSetIterator extends TwoPhaseIterator {
204212
private final LeafReader leafReader;
205-
private final Bits liveDocs;
206213
private final Visitor visitor;
207-
private int doc;
208214

209-
RoutingPartitionedDocIdSetIterator(LeafReader leafReader, Bits liveDocs, Visitor visitor) {
215+
RoutingPartitionedDocIdSetIterator(LeafReader leafReader, Visitor visitor) {
216+
super(DocIdSetIterator.all(leafReader.maxDoc())); // we iterate all live-docs
210217
this.leafReader = leafReader;
211-
this.liveDocs = liveDocs;
212218
this.visitor = visitor;
213-
doc = -1;
214-
}
215-
216-
@Override
217-
public int docID() {
218-
return doc;
219-
}
220-
221-
@Override
222-
public int nextDoc() throws IOException {
223-
while (++doc < leafReader.maxDoc()) {
224-
if (liveDocs == null || liveDocs.get(doc)) {
225-
visitor.reset();
226-
leafReader.document(doc, visitor);
227-
int targetShardId = OperationRouting.generateShardId(indexMetaData, visitor.id, visitor.routing);
228-
if (targetShardId != shardId) { // move to next doc if we can keep it
229-
return doc;
230-
}
231-
}
232-
}
233-
return doc = DocIdSetIterator.NO_MORE_DOCS;
234219
}
235220

236221
@Override
237-
public int advance(int target) throws IOException {
238-
while (nextDoc() < target) {}
239-
return doc;
222+
public boolean matches() throws IOException {
223+
int doc = approximation.docID();
224+
visitor.reset();
225+
leafReader.document(doc, visitor);
226+
int targetShardId = OperationRouting.generateShardId(indexMetaData, visitor.id, visitor.routing);
227+
return targetShardId != shardId;
240228
}
241229

242230
@Override
243-
public long cost() {
244-
return leafReader.maxDoc();
231+
public float matchCost() {
232+
return 42; // that's obvious, right?
245233
}
246234
}
247235
}

0 commit comments

Comments
 (0)