apply feedback from @jpoutz

s1monw · s1monw · commit 68d2fa6714a8 · 2017-10-12T15:52:26.000+02:00
diff --git a/core/src/main/java/org/elasticsearch/index/mapper/Uid.java b/core/src/main/java/org/elasticsearch/index/mapper/Uid.java
@@ -22,10 +22,8 @@
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.UnicodeUtil;
-import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.lucene.BytesRefs;
 
-import java.io.ByteArrayInputStream;
 import java.util.Arrays;
 import java.util.Base64;
 import java.util.Collection;
@@ -137,36 +135,36 @@ static boolean isURLBase64WithoutPadding(String id) {
         // 'xxx=' and 'xxx' could be considered the same id
         final int length = id.length();
         switch (length & 0x03) {
-        case 0:
-            break;
-        case 1:
-            return false;
-        case 2:
-            // the last 2 symbols (12 bits) are encoding 1 byte (8 bits)
-            // so the last symbol only actually uses 8-6=2 bits and can only take 4 values
-            char last = id.charAt(length - 1);
-            if (last != 'A' && last != 'Q' && last != 'g' && last != 'w') {
+            case 0:
+                break;
+            case 1:
                 return false;
-            }
-            break;
-        case 3:
-            // The last 3 symbols (18 bits) are encoding 2 bytes (16 bits)
-            // so the last symbol only actually uses 16-12=4 bits and can only take 16 values
-            last = id.charAt(length - 1);
-            if (last != 'A' && last != 'E' && last != 'I' && last != 'M' && last != 'Q'&& last != 'U'&& last != 'Y'
+            case 2:
+                // the last 2 symbols (12 bits) are encoding 1 byte (8 bits)
+                // so the last symbol only actually uses 8-6=2 bits and can only take 4 values
+                char last = id.charAt(length - 1);
+                if (last != 'A' && last != 'Q' && last != 'g' && last != 'w') {
+                    return false;
+                }
+                break;
+            case 3:
+                // The last 3 symbols (18 bits) are encoding 2 bytes (16 bits)
+                // so the last symbol only actually uses 16-12=4 bits and can only take 16 values
+                last = id.charAt(length - 1);
+                if (last != 'A' && last != 'E' && last != 'I' && last != 'M' && last != 'Q'&& last != 'U'&& last != 'Y'
                     && last != 'c'&& last != 'g'&& last != 'k' && last != 'o' && last != 's' && last != 'w'
                     && last != '0' && last != '4' && last != '8') {
-                return false;
-            }
-            break;
-        default:
-            // number & 0x03 is always in [0,3]
-            throw new AssertionError("Impossible case");
+                    return false;
+                }
+                break;
+            default:
+                // number & 0x03 is always in [0,3]
+                throw new AssertionError("Impossible case");
         }
         for (int i = 0; i < length; ++i) {
             final char c = id.charAt(i);
             final boolean allowed =
-                    (c >= '0' && c <= '9') ||
+                (c >= '0' && c <= '9') ||
                     (c >= 'A' && c <= 'Z') ||
                     (c >= 'a' && c <= 'z') ||
                     c == '-' || c == '_';
@@ -272,17 +270,17 @@ private static String decodeUtf8Id(byte[] idBytes, int offset, int length) {
     private static String decodeBase64Id(byte[] idBytes, int offset, int length) {
         assert Byte.toUnsignedInt(idBytes[offset]) <= BASE64_ESCAPE;
         if (Byte.toUnsignedInt(idBytes[offset]) == BASE64_ESCAPE) {
-            idBytes = Arrays.copyOfRange(idBytes, offset + 1, length);
-        } else if (idBytes.length != length || offset != 0) {
-            idBytes = Arrays.copyOfRange(idBytes, offset, length);
+            idBytes = Arrays.copyOfRange(idBytes, offset + 1, offset + length);
+        } else if ((idBytes.length == length && offset == 0) == false) { // no need to copy if it's not a slice
+            idBytes = Arrays.copyOfRange(idBytes, offset, offset + length);
         }
         return Base64.getUrlEncoder().withoutPadding().encodeToString(idBytes);
     }
 
     /** Decode an indexed id back to its original form.
      *  @see #encodeId */
     public static String decodeId(byte[] idBytes) {
-       return decodeId(idBytes, 0, idBytes.length);
+        return decodeId(idBytes, 0, idBytes.length);
     }
 
     /** Decode an indexed id back to its original form.
diff --git a/core/src/main/java/org/elasticsearch/index/shard/ShardSplittingQuery.java b/core/src/main/java/org/elasticsearch/index/shard/ShardSplittingQuery.java
@@ -31,11 +31,13 @@
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.search.TwoPhaseIterator;
 import org.apache.lucene.search.Weight;
 import org.apache.lucene.util.BitSetIterator;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.FixedBitSet;
+import org.elasticsearch.Version;
 import org.elasticsearch.cluster.metadata.IndexMetaData;
 import org.elasticsearch.cluster.routing.OperationRouting;
 import org.elasticsearch.index.mapper.IdFieldMapper;
@@ -56,6 +58,10 @@ final class ShardSplittingQuery extends Query {
     private final int shardId;
 
     ShardSplittingQuery(IndexMetaData indexMetaData, int shardId) {
+        if (indexMetaData.getCreationVersion().before(Version.V_6_0_0_rc2)) {
+            throw new IllegalArgumentException("Splitting query can only be executed on an index created with version "
+                + Version.V_6_0_0_rc2 + " or higher");
+        }
         this.indexMetaData = indexMetaData;
         this.shardId = shardId;
     }
@@ -87,7 +93,7 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
                         Bits liveDocs = leafReader.getLiveDocs();
                         Visitor visitor = new Visitor();
                         return new ConstantScoreScorer(this, score(),
-                            new RoutingPartitionedDocIdSetIterator(leafReader, liveDocs, visitor));
+                            new RoutingPartitionedDocIdSetIterator(leafReader, visitor));
                     } else {
                         // in the _routing case we first go and find all docs that have a routing value and mark the ones we have to delete
                         findSplitDocs(RoutingFieldMapper.NAME, ref -> {
@@ -111,6 +117,8 @@ public Scorer scorer(LeafReaderContext context) throws IOException {
                 }
                 return new ConstantScoreScorer(this, score(), new BitSetIterator(bitSet, bitSet.length()));
             }
+
+
         };
     }
 
@@ -121,12 +129,12 @@ public String toString(String field) {
 
     @Override
     public boolean equals(Object o) {
-        return sameClassAs(o);
+        throw new UnsupportedOperationException("only use this query for deleting documents");
     }
 
     @Override
     public int hashCode() {
-        return classHash();
+        throw new UnsupportedOperationException("only use this query for deleting documents");
     }
 
     private static void findSplitDocs(String idField, Predicate<BytesRef> includeInShard,
@@ -184,64 +192,44 @@ public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException {
 
         @Override
         public Status needsField(FieldInfo fieldInfo) throws IOException {
+            // we don't support 5.x so no need for the uid field
             switch (fieldInfo.name) {
                 case IdFieldMapper.NAME:
                 case RoutingFieldMapper.NAME:
                     leftToVisit--;
                     return Status.YES;
                 default:
                     return leftToVisit == 0 ? Status.STOP : Status.NO;
-
             }
         }
     }
 
     /**
-     * This DISI visits every live doc and selects all docs that don't belong into this
-     * shard based on their id and rounting value. This is only used in a routing partitioned index.
+     * This two phase iterator visits every live doc and selects all docs that don't belong into this
+     * shard based on their id and routing value. This is only used in a routing partitioned index.
      */
-    private final class RoutingPartitionedDocIdSetIterator extends DocIdSetIterator {
+    private final class RoutingPartitionedDocIdSetIterator extends TwoPhaseIterator {
         private final LeafReader leafReader;
-        private final Bits liveDocs;
         private final Visitor visitor;
-        private int doc;
 
-        RoutingPartitionedDocIdSetIterator(LeafReader leafReader, Bits liveDocs, Visitor visitor) {
+        RoutingPartitionedDocIdSetIterator(LeafReader leafReader, Visitor visitor) {
+            super(DocIdSetIterator.all(leafReader.maxDoc())); // we iterate all live-docs
             this.leafReader = leafReader;
-            this.liveDocs = liveDocs;
             this.visitor = visitor;
-            doc = -1;
-        }
-
-        @Override
-        public int docID() {
-            return doc;
-        }
-
-        @Override
-        public int nextDoc() throws IOException {
-            while (++doc < leafReader.maxDoc()) {
-                if (liveDocs == null || liveDocs.get(doc)) {
-                    visitor.reset();
-                    leafReader.document(doc, visitor);
-                    int targetShardId = OperationRouting.generateShardId(indexMetaData, visitor.id, visitor.routing);
-                    if (targetShardId != shardId) { // move to next doc if we can keep it
-                        return doc;
-                    }
-                }
-            }
-            return doc = DocIdSetIterator.NO_MORE_DOCS;
         }
 
         @Override
-        public int advance(int target) throws IOException {
-            while (nextDoc() < target) {}
-            return doc;
+        public boolean matches() throws IOException {
+            int doc = approximation.docID();
+            visitor.reset();
+            leafReader.document(doc, visitor);
+            int targetShardId = OperationRouting.generateShardId(indexMetaData, visitor.id, visitor.routing);
+            return targetShardId != shardId;
         }
 
         @Override
-        public long cost() {
-            return leafReader.maxDoc();
+        public float matchCost() {
+            return 42; // that's obvious, right?
         }
     }
 }