From 6930e8b0e3fba03ecf78578b2df35c2b22d19e08 Mon Sep 17 00:00:00 2001
From: David Turner <david.turner@elastic.co>
Date: Wed, 15 Apr 2020 12:49:34 +0100
Subject: [PATCH 01/10] Respect generational files in recoveryDiff

Today `MetadataSnapshot#recoveryDiff` considers the `.liv` file as per-commit
rather than per-segment and often transfers them during peer recoveries and
snapshot restores. It also considers differences in `.fnm`, `.dvd` and `.dvm`
files as indicating a difference in the whole segment, even though these files
may be adjusted without changing the segment itself.

This commit adjusts this logic to attach these generational files to the
segments themselves, allowing Elasticsearch only to transfer them if they are
genuinely needed.

Closes #55142
Resolves an outstanding `//NORELEASE` action related to #50999.
---
 .../org/elasticsearch/index/store/Store.java  | 153 +++++++++++-------
 .../elasticsearch/index/store/StoreTests.java | 134 ++++++++++-----
 .../SearchableSnapshotsIntegTests.java        |   3 +-
 3 files changed, 183 insertions(+), 107 deletions(-)
diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java
index 5aa99e818208f..de7c1d17f36cb 100644
--- a/server/src/main/java/org/elasticsearch/index/store/Store.java
+++ b/server/src/main/java/org/elasticsearch/index/store/Store.java
@@ -34,6 +34,7 @@
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.BufferedChecksum;
@@ -53,6 +54,7 @@
 import org.elasticsearch.ExceptionsHelper;
 import org.elasticsearch.common.UUIDs;
 import org.elasticsearch.common.bytes.BytesReference;
+import org.elasticsearch.common.collect.Tuple;
 import org.elasticsearch.common.io.Streams;
 import org.elasticsearch.common.io.stream.BytesStreamOutput;
 import org.elasticsearch.common.io.stream.StreamInput;
@@ -67,7 +69,6 @@
 import org.elasticsearch.common.unit.TimeValue;
 import org.elasticsearch.common.util.concurrent.AbstractRefCounted;
 import org.elasticsearch.common.util.concurrent.RefCounted;
-import org.elasticsearch.common.util.iterable.Iterables;
 import org.elasticsearch.core.internal.io.IOUtils;
 import org.elasticsearch.env.NodeEnvironment;
 import org.elasticsearch.env.ShardLock;
@@ -101,6 +102,7 @@
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
 import java.util.function.Consumer;
+import java.util.function.Predicate;
 import java.util.zip.CRC32;
 import java.util.zip.Checksum;
 
@@ -910,8 +912,6 @@ public Map<String, StoreFileMetadata> asMap() {
             return metadata;
         }
 
-        private static final String DEL_FILE_EXTENSION = "del"; // legacy delete file
-        private static final String LIV_FILE_EXTENSION = "liv"; // lucene 5 delete file
         private static final String SEGMENT_INFO_EXTENSION = "si";
 
         /**
@@ -922,80 +922,110 @@ public Map<String, StoreFileMetadata> asMap() {
          * <li>different: they exist in both snapshots but their they are not identical</li>
          * <li>missing: files that exist in the source but not in the target</li>
          * </ul>
-         * This method groups file into per-segment files and per-commit files. A file is treated as
-         * identical if and on if all files in it's group are identical. On a per-segment level files for a segment are treated
-         * as identical iff:
-         * <ul>
-         * <li>all files in this segment have the same checksum</li>
-         * <li>all files in this segment have the same length</li>
-         * <li>the segments {@code .si} files hashes are byte-identical Note: This is a using a perfect hash function,
-         * The metadata transfers the {@code .si} file content as it's hash</li>
-         * </ul>
          * <p>
-         * The {@code .si} file contains a lot of diagnostics including a timestamp etc. in the future there might be
-         * unique segment identifiers in there hardening this method further.
+         * Individual files are compared by name, length and checksum. The segment info ({@code *.si}) files and the segments file
+         * ({@code segments_N}) are also checked to be a byte-for-byte match.
          * <p>
-         * The per-commit files handles very similar. A commit is composed of the {@code segments_N} files as well as generational files
-         * like deletes ({@code _x_y.del}) or field-info ({@code _x_y.fnm}) files. On a per-commit level files for a commit are treated
-         * as identical iff:
-         * <ul>
-         * <li>all files belonging to this commit have the same checksum</li>
-         * <li>all files belonging to this commit have the same length</li>
-         * <li>the segments file {@code segments_N} files hashes are byte-identical Note: This is a using a perfect hash function,
-         * The metadata transfers the {@code segments_N} file content as it's hash</li>
-         * </ul>
+         * Files are collected together into a group for each segment plus one group of "per-commit" ({@code segments_N}) files. Each
+         * per-segment group is subdivided into a nongenerational group (most of them) and a generational group (e.g. {@code *.liv},
+         * {@code *.fnm}, {@code *.dvm}, {@code *.dvd} that have been updated by subsequent commits).
          * <p>
-         * NOTE: this diff will not contain the {@code segments.gen} file. This file is omitted on recovery.
+         * For each segment, if any nongenerational files are different then the whole segment is considered to be different and will be
+         * recovered in full. If all the nongenerational files are the same but any generational files are different then all the
+         * generational files are considered to be different and will be recovered in full, but the nongenerational files are left alone.
+         * Finally, if any file is different then all the per-commit files are recovered too.
+         */
+        /* Future work: the {@code *.si} file includes {@link SegmentInfo#getId()} which is a globally unique identifier for the
+         * nongenerational files in the segment so we could compare that instead of using the files lengths and checksums. We may also get a
+         * similar ID for the generational files in https://issues.apache.org/jira/browse/LUCENE-9324.
+         * TODO follow up once this Lucene discussion closes.
          */
-        public RecoveryDiff recoveryDiff(MetadataSnapshot recoveryTargetSnapshot) {
+        public RecoveryDiff recoveryDiff(final MetadataSnapshot targetSnapshot) {
+            final List<StoreFileMetadata> perCommitSourceFiles = new ArrayList<>();
+            final Map<String, Tuple<List<StoreFileMetadata>, List<StoreFileMetadata>>> perSegmentSourceFiles = new HashMap<>();
+            // per segment, a tuple of <<non-generational files, generational files>>
+
+            for (StoreFileMetadata sourceFile : this) {
+                if (sourceFile.name().startsWith("_")) {
+                    final String segmentId = IndexFileNames.parseSegmentName(sourceFile.name());
+                    final long generation = IndexFileNames.parseGeneration(sourceFile.name());
+                    final Tuple<List<StoreFileMetadata>, List<StoreFileMetadata>> perSegmentTuple = perSegmentSourceFiles
+                        .computeIfAbsent(segmentId, k -> Tuple.tuple(new ArrayList<>(), new ArrayList<>()));
+                    (generation == 0 ? perSegmentTuple.v1() : perSegmentTuple.v2()).add(sourceFile);
+                } else {
+                    assert sourceFile.name().startsWith(IndexFileNames.SEGMENTS + "_") : "unexpected " + sourceFile;
+                    perCommitSourceFiles.add(sourceFile);
+                }
+            }
+
             final List<StoreFileMetadata> identical = new ArrayList<>();
             final List<StoreFileMetadata> different = new ArrayList<>();
             final List<StoreFileMetadata> missing = new ArrayList<>();
-            final Map<String, List<StoreFileMetadata>> perSegment = new HashMap<>();
-            final List<StoreFileMetadata> perCommitStoreFiles = new ArrayList<>();
-
-            for (StoreFileMetadata meta : this) {
-                if (IndexFileNames.OLD_SEGMENTS_GEN.equals(meta.name())) { // legacy
-                    continue; // we don't need that file at all
+            final List<StoreFileMetadata> tmpIdentical = new ArrayList<>(); // confirm whole group is identical before adding to 'identical'
+            final Predicate<List<StoreFileMetadata>> groupComparer = sourceGroup -> {
+                assert tmpIdentical.isEmpty() : "not cleaned up: " + tmpIdentical;
+                boolean groupIdentical = true;
+                for (StoreFileMetadata sourceFile : sourceGroup) {
+                    final StoreFileMetadata targetFile = targetSnapshot.get(sourceFile.name());
+                    if (targetFile == null) {
+                        groupIdentical = false;
+                        missing.add(sourceFile);
+                    } else if (groupIdentical && targetFile.isSame(sourceFile)) {
+                        tmpIdentical.add(sourceFile);
+                    } else {
+                        groupIdentical = false;
+                        different.add(sourceFile);
+                    }
                 }
-                final String segmentId = IndexFileNames.parseSegmentName(meta.name());
-                final String extension = IndexFileNames.getExtension(meta.name());
-                if (IndexFileNames.SEGMENTS.equals(segmentId) ||
-                        DEL_FILE_EXTENSION.equals(extension) || LIV_FILE_EXTENSION.equals(extension)) {
-                    // only treat del files as per-commit files fnm files are generational but only for upgradable DV
-                    perCommitStoreFiles.add(meta);
+                if (groupIdentical) {
+                    identical.addAll(tmpIdentical);
                 } else {
-                    perSegment.computeIfAbsent(segmentId, k -> new ArrayList<>()).add(meta);
+                    different.addAll(tmpIdentical);
                 }
-            }
-            final ArrayList<StoreFileMetadata> identicalFiles = new ArrayList<>();
-            for (List<StoreFileMetadata> segmentFiles : Iterables.concat(perSegment.values(), Collections.singleton(perCommitStoreFiles))) {
-                identicalFiles.clear();
-                boolean consistent = true;
-                for (StoreFileMetadata meta : segmentFiles) {
-                    StoreFileMetadata storeFileMetadata = recoveryTargetSnapshot.get(meta.name());
-                    if (storeFileMetadata == null) {
-                        consistent = false;
-                        missing.add(meta);
-                    } else if (storeFileMetadata.isSame(meta) == false) {
-                        consistent = false;
-                        different.add(meta);
+                tmpIdentical.clear();
+                return groupIdentical;
+            };
+            final Consumer<List<StoreFileMetadata>> allDifferent = sourceGroup -> {
+                for (StoreFileMetadata sourceFile : sourceGroup) {
+                    final StoreFileMetadata targetFile = targetSnapshot.get(sourceFile.name());
+                    if (targetFile == null) {
+                        missing.add(sourceFile);
                     } else {
-                        identicalFiles.add(meta);
+                        different.add(sourceFile);
                     }
                 }
-                if (consistent) {
-                    identical.addAll(identicalFiles);
+            };
+
+            boolean segmentsIdentical = true;
+
+            for (Tuple<List<StoreFileMetadata>, List<StoreFileMetadata>> segmentFiles : perSegmentSourceFiles.values()) {
+                final List<StoreFileMetadata> nonGenerationalFiles = segmentFiles.v1();
+                final List<StoreFileMetadata> generationalFiles = segmentFiles.v2();
+
+                if (groupComparer.test(nonGenerationalFiles)) {
+                    // non-generational files are identical, now check the generational files
+                    segmentsIdentical = groupComparer.test(generationalFiles) && segmentsIdentical;
                 } else {
-                    // make sure all files are added - this can happen if only the deletes are different
-                    different.addAll(identicalFiles);
+                    // non-generational files were different, so consider the whole segment as different
+                    segmentsIdentical = false;
+                    allDifferent.accept(generationalFiles);
                 }
             }
-            RecoveryDiff recoveryDiff = new RecoveryDiff(Collections.unmodifiableList(identical),
-                Collections.unmodifiableList(different), Collections.unmodifiableList(missing));
-            assert recoveryDiff.size() == this.metadata.size() - (metadata.containsKey(IndexFileNames.OLD_SEGMENTS_GEN) ? 1 : 0)
-                    : "some files are missing recoveryDiff size: [" + recoveryDiff.size() + "] metadata size: [" +
-                      this.metadata.size() + "] contains  segments.gen: [" + metadata.containsKey(IndexFileNames.OLD_SEGMENTS_GEN) + "]";
+
+            if (segmentsIdentical) {
+                // segments were the same, check the per-commit files
+                groupComparer.test(perCommitSourceFiles);
+            } else {
+                // at least one segment was different, so treat all the per-commit files as different too
+                allDifferent.accept(perCommitSourceFiles);
+            }
+
+            final RecoveryDiff recoveryDiff = new RecoveryDiff(
+                Collections.unmodifiableList(identical),
+                Collections.unmodifiableList(different),
+                Collections.unmodifiableList(missing));
+            assert recoveryDiff.size() == metadata.size() : "some files are missing: recoveryDiff is [" + recoveryDiff
+                + "] comparing: [" + metadata + "] to [" + targetSnapshot.metadata + "]";
             return recoveryDiff;
         }
 
@@ -1099,7 +1129,6 @@ public String toString() {
         }
     }
 
-
     /**
      * Returns true if the file is auto-generated by the store and shouldn't be deleted during cleanup.
      * This includes write lock and checksum files
diff --git a/server/src/test/java/org/elasticsearch/index/store/StoreTests.java b/server/src/test/java/org/elasticsearch/index/store/StoreTests.java
index 33d6b7d59a850..f3cb040d29679 100644
--- a/server/src/test/java/org/elasticsearch/index/store/StoreTests.java
+++ b/server/src/test/java/org/elasticsearch/index/store/StoreTests.java
@@ -20,6 +20,7 @@
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.document.BinaryDocValuesField;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.SortedDocValuesField;
@@ -485,11 +486,16 @@ public void testRecoveryDiff() throws IOException, InterruptedException {
         List<Document> docs = new ArrayList<>();
         for (int i = 0; i < numDocs; i++) {
             Document doc = new Document();
-            doc.add(new StringField("id", "" + i, random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
-            doc.add(new TextField("body",
-                TestUtil.randomRealisticUnicodeString(random()), random().nextBoolean() ? Field.Store.YES : Field.Store.NO));
-            doc.add(new SortedDocValuesField("dv", new BytesRef(TestUtil.randomRealisticUnicodeString(random()))));
+            final Field.Store stringFieldStored = random().nextBoolean() ? Field.Store.YES : Field.Store.NO;
+            doc.add(new StringField("id", "" + i, stringFieldStored));
+            final String textFieldContent = TestUtil.randomRealisticUnicodeString(random());
+            final Field.Store textFieldStored = random().nextBoolean() ? Field.Store.YES : Field.Store.NO;
+            doc.add(new TextField("body", textFieldContent, textFieldStored));
+            final String docValueFieldContent = TestUtil.randomRealisticUnicodeString(random());
+            doc.add(new BinaryDocValuesField("dv", new BytesRef(docValueFieldContent)));
             docs.add(doc);
+            logger.info("--> doc [{}] id=[{}] (store={}) body=[{}] (store={}) dv=[{}]",
+                i, i, stringFieldStored, textFieldContent, textFieldStored, docValueFieldContent);
         }
         long seed = random().nextLong();
         Store.MetadataSnapshot first;
@@ -504,9 +510,8 @@ public void testRecoveryDiff() throws IOException, InterruptedException {
             final boolean lotsOfSegments = rarely(random);
             for (Document d : docs) {
                 writer.addDocument(d);
-                if (lotsOfSegments && random.nextBoolean()) {
-                    writer.commit();
-                } else if (rarely(random)) {
+                if (lotsOfSegments && random.nextBoolean() || rarely(random)) {
+                    logger.info("--> commit after doc {}", d.getField("id").stringValue());
                     writer.commit();
                 }
             }
@@ -533,9 +538,7 @@ public void testRecoveryDiff() throws IOException, InterruptedException {
             final boolean lotsOfSegments = rarely(random);
             for (Document d : docs) {
                 writer.addDocument(d);
-                if (lotsOfSegments && random.nextBoolean()) {
-                    writer.commit();
-                } else if (rarely(random)) {
+                if (lotsOfSegments && random.nextBoolean() || rarely(random)) {
                     writer.commit();
                 }
             }
@@ -560,35 +563,40 @@ public void testRecoveryDiff() throws IOException, InterruptedException {
         assertThat(selfDiff.different, empty());
         assertThat(selfDiff.missing, empty());
 
-
-        // lets add some deletes
-        Random random = new Random(seed);
-        IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(TestUtil.getDefaultCodec());
-        iwc.setMergePolicy(NoMergePolicy.INSTANCE);
-        iwc.setUseCompoundFile(random.nextBoolean());
-        iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
-        IndexWriter writer = new IndexWriter(store.directory(), iwc);
-        writer.deleteDocuments(new Term("id", Integer.toString(random().nextInt(numDocs))));
-        writer.commit();
-        writer.close();
-        Store.MetadataSnapshot metadata = store.getMetadata(null);
+        // delete a doc
+        final String deleteId = Integer.toString(random().nextInt(numDocs));
+        Store.MetadataSnapshot metadata;
+        {
+            Random random = new Random(seed);
+            IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(TestUtil.getDefaultCodec());
+            iwc.setMergePolicy(NoMergePolicy.INSTANCE);
+            iwc.setUseCompoundFile(random.nextBoolean());
+            iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
+            IndexWriter writer = new IndexWriter(store.directory(), iwc);
+            logger.info("--> delete doc {}", deleteId);
+            writer.deleteDocuments(new Term("id", deleteId));
+            writer.commit();
+            writer.close();
+            metadata = store.getMetadata(null);
+        }
         StoreFileMetadata delFile = null;
         for (StoreFileMetadata md : metadata) {
             if (md.name().endsWith(".liv")) {
                 delFile = md;
+                logger.info("--> delFile=[{}]", delFile);
                 break;
             }
         }
         Store.RecoveryDiff afterDeleteDiff = metadata.recoveryDiff(second);
         if (delFile != null) {
-            assertThat(afterDeleteDiff.identical.size(), equalTo(metadata.size() - 2)); // segments_N + del file
-            assertThat(afterDeleteDiff.different.size(), equalTo(0));
-            assertThat(afterDeleteDiff.missing.size(), equalTo(2));
+            assertThat(afterDeleteDiff.toString(), afterDeleteDiff.identical.size(), equalTo(metadata.size() - 2)); // segments_N + del file
+            assertThat(afterDeleteDiff.toString(), afterDeleteDiff.different.size(), equalTo(0));
+            assertThat(afterDeleteDiff.toString(), afterDeleteDiff.missing.size(), equalTo(2));
         } else {
             // an entire segment must be missing (single doc segment got dropped)
-            assertThat(afterDeleteDiff.identical.size(), greaterThan(0));
-            assertThat(afterDeleteDiff.different.size(), equalTo(0));
-            assertThat(afterDeleteDiff.missing.size(), equalTo(1)); // the commit file is different
+            assertThat(afterDeleteDiff.toString(), afterDeleteDiff.identical.size(), greaterThan(0));
+            assertThat(afterDeleteDiff.toString(), afterDeleteDiff.different.size(), equalTo(0));
+            assertThat(afterDeleteDiff.toString(), afterDeleteDiff.missing.size(), equalTo(1)); // the commit file is different
         }
 
         // check the self diff
@@ -598,30 +606,70 @@ public void testRecoveryDiff() throws IOException, InterruptedException {
         assertThat(selfDiff.missing, empty());
 
         // add a new commit
-        iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(TestUtil.getDefaultCodec());
-        iwc.setMergePolicy(NoMergePolicy.INSTANCE);
-        iwc.setUseCompoundFile(true); // force CFS - easier to test here since we know it will add 3 files
-        iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
-        writer = new IndexWriter(store.directory(), iwc);
-        writer.addDocument(docs.get(0));
-        writer.close();
+        {
+            Random random = new Random(seed);
+            IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(TestUtil.getDefaultCodec());
+            iwc.setMergePolicy(NoMergePolicy.INSTANCE);
+            iwc.setUseCompoundFile(true); // force CFS - easier to test here since we know it will add 3 files
+            iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
+            IndexWriter writer = new IndexWriter(store.directory(), iwc);
+            logger.info("--> add new empty doc");
+            writer.addDocument(new Document());
+            writer.close();
+        }
 
         Store.MetadataSnapshot newCommitMetadata = store.getMetadata(null);
         Store.RecoveryDiff newCommitDiff = newCommitMetadata.recoveryDiff(metadata);
         if (delFile != null) {
-            assertThat(newCommitDiff.identical.size(),
-                equalTo(newCommitMetadata.size() - 5)); // segments_N, del file, cfs, cfe, si for the new segment
-            assertThat(newCommitDiff.different.size(), equalTo(1)); // the del file must be different
-            assertThat(newCommitDiff.different.get(0).name(), endsWith(".liv"));
-            assertThat(newCommitDiff.missing.size(), equalTo(4)); // segments_N,cfs, cfe, si for the new segment
+            assertThat(newCommitDiff.toString(), newCommitDiff.identical.size(),
+                equalTo(newCommitMetadata.size() - 4)); // segments_N, cfs, cfe, si for the new segment
+            assertThat(newCommitDiff.toString(), newCommitDiff.different.size(), equalTo(0)); // the del file must be different
+            assertThat(newCommitDiff.toString(), newCommitDiff.missing.size(), equalTo(4)); // segments_N,cfs, cfe, si for the new segment
+            assertTrue(newCommitDiff.toString(), newCommitDiff.identical.stream().anyMatch(m -> m.name().endsWith(".liv")));
         } else {
-            assertThat(newCommitDiff.identical.size(),
+            assertThat(newCommitDiff.toString(), newCommitDiff.identical.size(),
                 equalTo(newCommitMetadata.size() - 4)); // segments_N, cfs, cfe, si for the new segment
-            assertThat(newCommitDiff.different.size(), equalTo(0));
-            assertThat(newCommitDiff.missing.size(),
+            assertThat(newCommitDiff.toString(), newCommitDiff.different.size(), equalTo(0));
+            assertThat(newCommitDiff.toString(), newCommitDiff.missing.size(),
                 equalTo(4)); // an entire segment must be missing (single doc segment got dropped)  plus the commit is different
         }
 
+        // update doc values
+        Store.MetadataSnapshot dvUpdateSnapshot;
+        final String updateId = randomValueOtherThan(deleteId, () -> Integer.toString(random().nextInt(numDocs)));
+        {
+            Random random = new Random(seed);
+            IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random)).setCodec(TestUtil.getDefaultCodec());
+            iwc.setMergePolicy(NoMergePolicy.INSTANCE);
+            iwc.setUseCompoundFile(random.nextBoolean());
+            iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
+            IndexWriter writer = new IndexWriter(store.directory(), iwc);
+            final String newDocValue = TestUtil.randomRealisticUnicodeString(random());
+            logger.info("--> update doc [{}] with dv=[{}]", updateId, newDocValue);
+            writer.updateBinaryDocValue(new Term("id", updateId), "dv", new BytesRef(newDocValue));
+            writer.commit();
+            writer.close();
+            dvUpdateSnapshot = store.getMetadata(null);
+        }
+        logger.info("--> source: {}", dvUpdateSnapshot.asMap());
+        logger.info("--> target: {}", newCommitMetadata.asMap());
+        Store.RecoveryDiff dvUpdateDiff = dvUpdateSnapshot.recoveryDiff(newCommitMetadata);
+        final int delFileCount;
+        if (delFile == null || dvUpdateDiff.different.isEmpty()) {
+            // liv file either doesn't exist or belongs to a different segment from the one that we just updated
+            delFileCount = 0;
+            assertThat(dvUpdateDiff.toString(), dvUpdateDiff.different, empty());
+        } else {
+            // liv file is generational and belongs to the updated segment
+            delFileCount = 1;
+            assertThat(dvUpdateDiff.toString(), dvUpdateDiff.different.size(), equalTo(1));
+            assertThat(dvUpdateDiff.toString(), dvUpdateDiff.different.get(0).name(), endsWith(".liv"));
+        }
+
+        assertThat(dvUpdateDiff.toString(), dvUpdateDiff.identical.size(), equalTo(dvUpdateSnapshot.size() - 4 - delFileCount));
+        assertThat(dvUpdateDiff.toString(), dvUpdateDiff.different.size(), equalTo(delFileCount));
+        assertThat(dvUpdateDiff.toString(), dvUpdateDiff.missing.size(), equalTo(4)); // segments_N, fnm, dvd, dvm for the updated segment
+
         deleteContent(store.directory());
         IOUtils.close(store);
     }
diff --git a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsIntegTests.java b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsIntegTests.java
index 8bc0fc1b67120..de9acf2cf340f 100644
--- a/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsIntegTests.java
+++ b/x-pack/plugin/searchable-snapshots/src/test/java/org/elasticsearch/xpack/searchablesnapshots/SearchableSnapshotsIntegTests.java
@@ -76,8 +76,7 @@ public void testCreateAndRestoreSearchableSnapshot() throws Exception {
         for (int i = between(10, 10_000); i >= 0; i--) {
             indexRequestBuilders.add(client().prepareIndex(indexName).setSource("foo", randomBoolean() ? "bar" : "baz"));
         }
-        // TODO NORELEASE no dummy docs since that includes deletes, yet we always copy the .liv file in peer recovery
-        indexRandom(true, false, indexRequestBuilders);
+        indexRandom(true, true, indexRequestBuilders);
         refresh(indexName);
         assertThat(
             client().admin().indices().prepareForceMerge(indexName).setOnlyExpungeDeletes(true).setFlush(true).get().getFailedShards(),

From c159fc8a4dd5b8b0a26a1f86ff1e69bcb5d3cfdf Mon Sep 17 00:00:00 2001
From: David Turner <david.turner@elastic.co>
Date: Wed, 15 Apr 2020 15:41:42 +0100
Subject: [PATCH 02/10] Imports

---
 server/src/main/java/org/elasticsearch/index/store/Store.java | 1 -
 1 file changed, 1 deletion(-)

diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java
index de7c1d17f36cb..25971b40eb53a 100644
--- a/server/src/main/java/org/elasticsearch/index/store/Store.java
+++ b/server/src/main/java/org/elasticsearch/index/store/Store.java
@@ -34,7 +34,6 @@
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.NoMergePolicy;
 import org.apache.lucene.index.SegmentCommitInfo;
-import org.apache.lucene.index.SegmentInfo;
 import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.BufferedChecksum;

From 85e85ade509752fea644dba527487e85c09a058b Mon Sep 17 00:00:00 2001
From: David Turner <david.turner@elastic.co>
Date: Wed, 13 May 2020 13:09:15 +0100
Subject: [PATCH 03/10] WIP add support for writer-assigned UUIDs introduced in
 Lucene 8.6

---
 .../BlobStoreIndexShardSnapshot.java          | 21 +++++-
 .../org/elasticsearch/index/store/Store.java  | 21 ++++--
 .../index/store/StoreFileMetadata.java        | 73 ++++++++++++++++++-
 .../snapshots/blobstore/FileInfoTests.java    | 23 +++---
 4 files changed, 114 insertions(+), 24 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java
index ccb44cd9550ee..a4d93e328a89c 100644
--- a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java
+++ b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java
@@ -36,6 +36,8 @@
 import java.util.Collections;
 import java.util.List;
 
+import static org.elasticsearch.index.store.StoreFileMetadata.UNAVAILABLE_WRITER_UUID;
+
 /**
  * Shard snapshot metadata
  */
@@ -231,6 +233,7 @@ public boolean isSame(FileInfo fileInfo) {
         static final String PART_SIZE = "part_size";
         static final String WRITTEN_BY = "written_by";
         static final String META_HASH = "meta_hash";
+        static final String WRITER_UUID = "writer_uuid";
 
         /**
          * Serializes file info into JSON
@@ -252,10 +255,16 @@ public static void toXContent(FileInfo file, XContentBuilder builder) throws IOE
                 builder.field(WRITTEN_BY, file.metadata.writtenBy());
             }
 
-            if (file.metadata.hash() != null && file.metadata().hash().length > 0) {
-                BytesRef br = file.metadata.hash();
-                builder.field(META_HASH, br.bytes, br.offset, br.length);
+            final BytesRef hash = file.metadata.hash();
+            if (hash != null && hash.length > 0) {
+                builder.field(META_HASH, hash.bytes, hash.offset, hash.length);
+            }
+
+            final BytesRef writerUuid = file.metadata.writerUuid();
+            if (writerUuid.length > 0) {
+                builder.field(WRITER_UUID, writerUuid.bytes, writerUuid.offset, writerUuid.length);
             }
+
             builder.endObject();
         }
 
@@ -275,6 +284,7 @@ public static FileInfo fromXContent(XContentParser parser) throws IOException {
             Version writtenBy = null;
             String writtenByStr = null;
             BytesRef metaHash = new BytesRef();
+            BytesRef writerUuid = UNAVAILABLE_WRITER_UUID;
             if (token == XContentParser.Token.START_OBJECT) {
                 while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                     if (token == XContentParser.Token.FIELD_NAME) {
@@ -298,6 +308,9 @@ public static FileInfo fromXContent(XContentParser parser) throws IOException {
                                 metaHash.bytes = parser.binaryValue();
                                 metaHash.offset = 0;
                                 metaHash.length = metaHash.bytes.length;
+                            } else if (WRITER_UUID.equals(currentFieldName)) {
+                                writerUuid = new BytesRef(parser.binaryValue());
+                                assert writerUuid.length > 0;
                             } else {
                                 throw new ElasticsearchParseException("unknown parameter [{}]", currentFieldName);
                             }
@@ -322,7 +335,7 @@ public static FileInfo fromXContent(XContentParser parser) throws IOException {
             } else if (checksum == null) {
                 throw new ElasticsearchParseException("missing checksum for name [" + name + "]");
             }
-            return new FileInfo(name, new StoreFileMetadata(physicalName, length, checksum, writtenBy, metaHash), partSize);
+            return new FileInfo(name, new StoreFileMetadata(physicalName, length, checksum, writtenBy, metaHash, writerUuid), partSize);
         }
 
         @Override
diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java
index 1a19a6feaefe5..28e6b03cbbfd7 100644
--- a/server/src/main/java/org/elasticsearch/index/store/Store.java
+++ b/server/src/main/java/org/elasticsearch/index/store/Store.java
@@ -138,7 +138,7 @@ public class Store extends AbstractIndexShardComponent implements Closeable, Ref
 
     /**
      * Specific {@link IOContext} used to verify Lucene files footer checksums.
-     * See {@link MetadataSnapshot#checksumFromLuceneFile(Directory, String, Map, Logger, Version, boolean)}
+     * See {@link MetadataSnapshot#checksumFromLuceneFile(Directory, String, Map, Logger, Version, boolean, BytesRef)}
      */
     public static final IOContext READONCE_CHECKSUM = new IOContext(IOContext.READONCE.context);
 
@@ -828,16 +828,22 @@ static LoadedMetadata loadMetadata(IndexCommit commit, Directory directory, Logg
                     if (version.onOrAfter(maxVersion)) {
                         maxVersion = version;
                     }
+
+                    final BytesRef segmentInfoId = StoreFileMetadata.toWriterUuid(info.info.getId());
+                    final BytesRef segmentCommitInfoId = StoreFileMetadata.toWriterUuid(info.getId());
+
                     for (String file : info.files()) {
                         checksumFromLuceneFile(directory, file, builder, logger, version,
-                            SEGMENT_INFO_EXTENSION.equals(IndexFileNames.getExtension(file)));
+                            SEGMENT_INFO_EXTENSION.equals(IndexFileNames.getExtension(file)),
+                            IndexFileNames.parseGeneration(file) == 0 ? segmentInfoId : segmentCommitInfoId);
                     }
                 }
                 if (maxVersion == null) {
                     maxVersion = org.elasticsearch.Version.CURRENT.minimumIndexCompatibilityVersion().luceneVersion;
                 }
                 final String segmentsFile = segmentCommitInfos.getSegmentsFileName();
-                checksumFromLuceneFile(directory, segmentsFile, builder, logger, maxVersion, true);
+                checksumFromLuceneFile(directory, segmentsFile, builder, logger, maxVersion, true,
+                    StoreFileMetadata.toWriterUuid(segmentCommitInfos.getId()));
             } catch (CorruptIndexException | IndexNotFoundException | IndexFormatTooOldException | IndexFormatTooNewException ex) {
                 // we either know the index is corrupted or it's just not there
                 throw ex;
@@ -863,7 +869,7 @@ static LoadedMetadata loadMetadata(IndexCommit commit, Directory directory, Logg
         }
 
         private static void checksumFromLuceneFile(Directory directory, String file, Map<String, StoreFileMetadata> builder,
-                Logger logger, Version version, boolean readFileAsHash) throws IOException {
+                Logger logger, Version version, boolean readFileAsHash, BytesRef writerUuid) throws IOException {
             final String checksum;
             final BytesRefBuilder fileHash = new BytesRefBuilder();
             try (IndexInput in = directory.openInput(file, READONCE_CHECKSUM)) {
@@ -888,7 +894,7 @@ private static void checksumFromLuceneFile(Directory directory, String file, Map
                     logger.debug(() -> new ParameterizedMessage("Can retrieve checksum from file [{}]", file), ex);
                     throw ex;
                 }
-                builder.put(file, new StoreFileMetadata(file, length, checksum, version, fileHash.get()));
+                builder.put(file, new StoreFileMetadata(file, length, checksum, version, fileHash.get(), writerUuid));
             }
         }
 
@@ -928,8 +934,9 @@ public Map<String, StoreFileMetadata> asMap() {
          * <li>missing: files that exist in the source but not in the target</li>
          * </ul>
          * <p>
-         * Individual files are compared by name, length and checksum. The segment info ({@code *.si}) files and the segments file
-         * ({@code segments_N}) are also checked to be a byte-for-byte match.
+         * Individual files are compared by name, length, checksum and (if present) a UUID that was assigned when the file was originally
+         * written. The segment info ({@code *.si}) files and the segments file ({@code segments_N}) are also checked to be a byte-for-byte
+         * match.
          * <p>
          * Files are collected together into a group for each segment plus one group of "per-commit" ({@code segments_N}) files. Each
          * per-segment group is subdivided into a nongenerational group (most of them) and a generational group (e.g. {@code *.liv},
diff --git a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
index b9de3090de503..140c05ab44db3 100644
--- a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
+++ b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
@@ -20,8 +20,13 @@
 package org.elasticsearch.index.store;
 
 import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentInfo;
+import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Version;
+import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.io.stream.Writeable;
@@ -33,6 +38,9 @@
 
 public class StoreFileMetadata implements Writeable {
 
+    public static final BytesRef UNAVAILABLE_WRITER_UUID = new BytesRef();
+    private static final org.elasticsearch.Version WRITER_UUID_MIN_VERSION = org.elasticsearch.Version.V_8_0_0;
+
     private final String name;
 
     // the actual file size on "disk", if compressed, the compressed size
@@ -44,16 +52,21 @@ public class StoreFileMetadata implements Writeable {
 
     private final BytesRef hash;
 
+    private final BytesRef writerUuid;
+
     public StoreFileMetadata(String name, long length, String checksum, Version writtenBy) {
-        this(name, length, checksum, writtenBy, null);
+        this(name, length, checksum, writtenBy, null, UNAVAILABLE_WRITER_UUID);
     }
 
-    public StoreFileMetadata(String name, long length, String checksum, Version writtenBy, BytesRef hash) {
+    public StoreFileMetadata(String name, long length, String checksum, Version writtenBy, BytesRef hash, BytesRef writerUuid) {
         this.name = Objects.requireNonNull(name, "name must not be null");
         this.length = length;
         this.checksum = Objects.requireNonNull(checksum, "checksum must not be null");
         this.writtenBy = Objects.requireNonNull(writtenBy, "writtenBy must not be null");
         this.hash = hash == null ? new BytesRef() : hash;
+
+        assert writerUuid != null && (writerUuid.length > 0 || writerUuid == UNAVAILABLE_WRITER_UUID);
+        this.writerUuid = Objects.requireNonNull(writerUuid, "writerUuid must not be null");
     }
 
     /**
@@ -69,6 +82,12 @@ public StoreFileMetadata(StreamInput in) throws IOException {
             throw new AssertionError(e);
         }
         hash = in.readBytesRef();
+        if (in.getVersion().onOrAfter(WRITER_UUID_MIN_VERSION)) {
+            writerUuid = StoreFileMetadata.toWriterUuid(in.readBytesRef());
+        } else {
+            writerUuid = UNAVAILABLE_WRITER_UUID;
+        }
+
     }
 
     @Override
@@ -78,6 +97,9 @@ public void writeTo(StreamOutput out) throws IOException {
         out.writeString(checksum);
         out.writeString(writtenBy.toString());
         out.writeBytesRef(hash);
+        if (out.getVersion().onOrAfter(WRITER_UUID_MIN_VERSION)) {
+            out.writeBytesRef(writerUuid);
+        }
     }
 
     /**
@@ -133,6 +155,16 @@ public boolean isSame(StoreFileMetadata other) {
             // we can't tell if either or is null so we return false in this case! this is why we don't use equals for this!
             return false;
         }
+        if (writerUuid.length > 0 && other.writerUuid.length > 0) {
+            // if the writer ID is missing on one of the files then we ignore this field and just rely on the checksum and hash, but if
+            // it's present on both files then it must be identical
+            if (writerUuid.equals(other.writerUuid) == false) {
+                return false;
+            } else {
+                assert name.equals(other.name) && length == other.length && checksum.equals(other.checksum) : this + " vs " + other;
+                assert hash.equals(other.hash) : this + " vs " + other + " with hashes " + hash + " vs " + other.hash;
+            }
+        }
         return length == other.length && checksum.equals(other.checksum) && hash.equals(other.hash);
     }
 
@@ -155,4 +187,41 @@ public Version writtenBy() {
     public BytesRef hash() {
         return hash;
     }
+
+    /**
+     * Returns the globally-unique ID that was assigned by the {@link IndexWriter} that originally wrote this file:
+     *
+     * - For `segments_N` files this is {@link SegmentInfos#getId()} which uniquely identifies the commit.
+     * - For non-generational segment files this is {@link SegmentInfo#getId()} which uniquely identifies the segment.
+     * - For generational segment files (i.e. updated docvalues, liv files etc) this is {@link SegmentCommitInfo#getId()}
+     *     which uniquely identifies the generation of the segment.
+     *
+     * This ID may be {@link StoreFileMetadata#UNAVAILABLE_WRITER_UUID} (i.e. zero-length) if unavilable, e.g.:
+     *
+     * - The file was written by a version of Lucene prior to {@link org.apache.lucene.util.Version#LUCENE_8_6_0}.
+     * - The metadata came from a version of Elasticsearch prior to {@link StoreFileMetadata#WRITER_UUID_MIN_VERSION}).
+     * - The file is not one of the files listed above.
+     *
+     */
+    public BytesRef writerUuid() {
+        return writerUuid;
+    }
+
+    static BytesRef toWriterUuid(BytesRef bytesRef) {
+        if (bytesRef.length == 0) {
+            return UNAVAILABLE_WRITER_UUID;
+        } else {
+            return bytesRef;
+        }
+    }
+
+    static BytesRef toWriterUuid(@Nullable byte[] id) {
+        if (id == null) {
+            return UNAVAILABLE_WRITER_UUID;
+        } else {
+            assert id.length > 0;
+            return new BytesRef(id);
+        }
+    }
+
 }
diff --git a/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java b/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java
index 0a4ce23e34895..aabbbafc16139 100644
--- a/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java
+++ b/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java
@@ -45,13 +45,10 @@ public class FileInfoTests extends ESTestCase {
     public void testToFromXContent() throws IOException {
         final int iters = scaledRandomIntBetween(1, 10);
         for (int iter = 0; iter < iters; iter++) {
-            final BytesRef hash = new BytesRef(scaledRandomIntBetween(0, 1024 * 1024));
-            hash.length = hash.bytes.length;
-            for (int i = 0; i < hash.length; i++) {
-                hash.bytes[i] = randomByte();
-            }
+            final BytesRef hash = randomBytesRef(1024 * 1024);
+            final BytesRef writerUuid = randomBytesRef(20);
             StoreFileMetadata meta = new StoreFileMetadata("foobar", Math.abs(randomLong()), randomAlphaOfLengthBetween(1, 10),
-                Version.LATEST, hash);
+                Version.LATEST, hash, writerUuid);
             ByteSizeValue size = new ByteSizeValue(Math.abs(randomLong()));
             BlobStoreIndexShardSnapshot.FileInfo info = new BlobStoreIndexShardSnapshot.FileInfo("_foobar", meta, size);
             XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON).prettyPrint();
@@ -75,14 +72,18 @@ public void testToFromXContent() throws IOException {
         }
     }
 
+    private static BytesRef randomBytesRef(int maxSize) {
+        final BytesRef hash = new BytesRef(scaledRandomIntBetween(0, maxSize));
+        hash.length = hash.bytes.length;
+        for (int i = 0; i < hash.length; i++) {
+            hash.bytes[i] = randomByte();
+        }
+        return hash;
+    }
+
     public void testInvalidFieldsInFromXContent() throws IOException {
         final int iters = scaledRandomIntBetween(1, 10);
         for (int iter = 0; iter < iters; iter++) {
-            final BytesRef hash = new BytesRef(scaledRandomIntBetween(0, 1024 * 1024));
-            hash.length = hash.bytes.length;
-            for (int i = 0; i < hash.length; i++) {
-                hash.bytes[i] = randomByte();
-            }
             String name = "foobar";
             String physicalName = "_foobar";
             String failure = null;

From 0edde82e60a4b2ccf751fe5780791692a4075592 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Tue, 14 Sep 2021 14:54:31 +0200
Subject: [PATCH 04/10] Remove dated comment

---
 .../main/java/org/elasticsearch/index/store/Store.java | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java
index c1ae332ce776c..56315cf71d254 100644
--- a/server/src/main/java/org/elasticsearch/index/store/Store.java
+++ b/server/src/main/java/org/elasticsearch/index/store/Store.java
@@ -188,7 +188,10 @@ private static SegmentInfos readSegmentsInfo(IndexCommit commit, Directory direc
         try {
             return commit == null ? Lucene.readSegmentInfos(directory) : Lucene.readSegmentInfos(commit);
         } catch (EOFException eof) {
-            // TODO this should be caught by lucene - EOF is almost certainly an index corruption
+            //
+            //
+            //
+            //  this should be caught by lucene - EOF is almost certainly an index corruption
             throw new CorruptIndexException("Read past EOF while reading segment infos", "commit(" + commit + ")", eof);
         } catch (IOException exception) {
             throw exception; // IOExceptions like too many open files are not necessarily a corruption - just bubble it up
@@ -954,11 +957,6 @@ public Map<String, StoreFileMetadata> asMap() {
          * generational files are considered to be different and will be recovered in full, but the nongenerational files are left alone.
          * Finally, if any file is different then all the per-commit files are recovered too.
          */
-        /* Future work: the {@code *.si} file includes {@link SegmentInfo#getId()} which is a globally unique identifier for the
-         * nongenerational files in the segment so we could compare that instead of using the files lengths and checksums. We may also get a
-         * similar ID for the generational files in https://issues.apache.org/jira/browse/LUCENE-9324.
-         * TODO follow up once this Lucene discussion closes.
-         */
         public RecoveryDiff recoveryDiff(final MetadataSnapshot targetSnapshot) {
             final List<StoreFileMetadata> perCommitSourceFiles = new ArrayList<>();
             final Map<String, Tuple<List<StoreFileMetadata>, List<StoreFileMetadata>>> perSegmentSourceFiles = new HashMap<>();

From 968a8d536b1a0851b441b1883f746deede580f04 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Tue, 14 Sep 2021 15:40:30 +0200
Subject: [PATCH 05/10] Fix SnapshotsRecoveryPlannerServiceTests

---
 .../recovery/plan/SnapshotsRecoveryPlannerServiceTests.java     | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/server/src/test/java/org/elasticsearch/indices/recovery/plan/SnapshotsRecoveryPlannerServiceTests.java b/server/src/test/java/org/elasticsearch/indices/recovery/plan/SnapshotsRecoveryPlannerServiceTests.java
index 6d00b3a931ef1..077cf634ab46c 100644
--- a/server/src/test/java/org/elasticsearch/indices/recovery/plan/SnapshotsRecoveryPlannerServiceTests.java
+++ b/server/src/test/java/org/elasticsearch/indices/recovery/plan/SnapshotsRecoveryPlannerServiceTests.java
@@ -471,7 +471,7 @@ private void assertUsesExpectedSnapshot(ShardRecoveryPlan shardRecoveryPlan,
     // StoreFileMetadata doesn't implement #equals, we rely on StoreFileMetadata#isSame for equality checks
     private boolean containsFile(List<StoreFileMetadata> files, StoreFileMetadata fileMetadata) {
         for (StoreFileMetadata file : files) {
-            if (file.isSame(fileMetadata)) {
+            if (fileMetadata.name().equals(file.name()) && file.isSame(fileMetadata)) {
                 return true;
             }
         }

From da417cea25640c0d5ec2affb387698980997eb56 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Thu, 23 Sep 2021 17:17:48 +0200
Subject: [PATCH 06/10] Compare contents directly when possible

---
 .../org/elasticsearch/index/store/StoreFileMetadata.java     | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
index 7321b5481ef53..245c251a5455c 100644
--- a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
+++ b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
@@ -141,6 +141,11 @@ public boolean isSame(StoreFileMetadata other) {
             // we can't tell if either or is null so we return false in this case! this is why we don't use equals for this!
             return false;
         }
+
+        if (hashEqualsContents()) {
+            return hash.equals(other.hash);
+        }
+
         if (writerUuid.length > 0 && other.writerUuid.length > 0) {
             // if the writer ID is missing on one of the files then we ignore this field and just rely on the checksum and hash, but if
             // it's present on both files then it must be identical

From 7f2b8bcb7384053d3c08843877078a1e0152521d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Tue, 28 Sep 2021 17:41:09 +0200
Subject: [PATCH 07/10] Take into account version for FileInfo serialization

---
 .../BlobStoreIndexShardSnapshot.java          | 10 ++++--
 .../BlobStoreIndexShardSnapshots.java         |  2 +-
 .../org/elasticsearch/index/store/Store.java  |  5 +--
 .../index/store/StoreFileMetadata.java        |  3 ++
 .../blobstore/BlobStoreRepository.java        | 33 +++++++++++++----
 .../blobstore/ChecksumBlobStoreFormat.java    | 35 +++++++++++++++++--
 .../snapshots/SnapshotsService.java           |  7 ++++
 .../snapshots/blobstore/FileInfoTests.java    | 16 +++++++--
 8 files changed, 93 insertions(+), 18 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java
index d2724f0e7117f..391384d1a6132 100644
--- a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java
+++ b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshot.java
@@ -38,6 +38,7 @@ public class BlobStoreIndexShardSnapshot implements ToXContentFragment {
      * Information about snapshotted file
      */
     public static class FileInfo implements Writeable {
+        public static final String SERIALIZE_WRITER_UUID = "serialize_writer_uuid";
 
         private final String name;
         private final ByteSizeValue partSize;
@@ -248,7 +249,7 @@ public boolean isSame(FileInfo fileInfo) {
          * @param file    file info
          * @param builder XContent builder
          */
-        public static void toXContent(FileInfo file, XContentBuilder builder) throws IOException {
+        public static void toXContent(FileInfo file, XContentBuilder builder, Params params) throws IOException {
             builder.startObject();
             builder.field(NAME, file.name);
             builder.field(PHYSICAL_NAME, file.metadata.name());
@@ -268,7 +269,10 @@ public static void toXContent(FileInfo file, XContentBuilder builder) throws IOE
             }
 
             final BytesRef writerUuid = file.metadata.writerUuid();
-            if (writerUuid.length > 0) {
+            // We serialize by default when SERIALIZE_WRITER_UUID is not present since in deletes/clones
+            // we read the serialized files from the blob store and we enforce the version invariants when
+            // the snapshot was done
+            if (writerUuid.length > 0 && params.paramAsBoolean(SERIALIZE_WRITER_UUID, true)) {
                 builder.field(WRITER_UUID, writerUuid.bytes, writerUuid.offset, writerUuid.length);
             }
 
@@ -516,7 +520,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
         builder.field(INCREMENTAL_SIZE, incrementalSize);
         builder.startArray(FILES);
         for (FileInfo fileInfo : indexFiles) {
-            FileInfo.toXContent(fileInfo, builder);
+            FileInfo.toXContent(fileInfo, builder, params);
         }
         builder.endArray();
         return builder;
diff --git a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java
index 503128b26b8f7..2c3fb3143e524 100644
--- a/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java
+++ b/server/src/main/java/org/elasticsearch/index/snapshots/blobstore/BlobStoreIndexShardSnapshots.java
@@ -208,7 +208,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws
         // First we list all blobs with their file infos:
         builder.startArray(Fields.FILES);
         for (Map.Entry<String, FileInfo> entry : files.entrySet()) {
-            FileInfo.toXContent(entry.getValue(), builder);
+            FileInfo.toXContent(entry.getValue(), builder, params);
         }
         builder.endArray();
         // Then we list all snapshots with list of all blobs that are used by the snapshot
diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java
index e9fcb6e19d438..44c0aa8be1064 100644
--- a/server/src/main/java/org/elasticsearch/index/store/Store.java
+++ b/server/src/main/java/org/elasticsearch/index/store/Store.java
@@ -189,10 +189,7 @@ private static SegmentInfos readSegmentsInfo(IndexCommit commit, Directory direc
         try {
             return commit == null ? Lucene.readSegmentInfos(directory) : Lucene.readSegmentInfos(commit);
         } catch (EOFException eof) {
-            //
-            //
-            //
-            //  this should be caught by lucene - EOF is almost certainly an index corruption
+            // TODO this should be caught by lucene - EOF is almost certainly an index corruption
             throw new CorruptIndexException("Read past EOF while reading segment infos", "commit(" + commit + ")", eof);
         } catch (IOException exception) {
             throw exception; // IOExceptions like too many open files are not necessarily a corruption - just bubble it up
diff --git a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
index 245c251a5455c..563541fe13536 100644
--- a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
+++ b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
@@ -142,6 +142,9 @@ public boolean isSame(StoreFileMetadata other) {
             return false;
         }
 
+        // If we have the file contents, we directly compare the contents. This is useful to compare segment info
+        // files of source-only snapshots where the original segment info file shares the same id as the source-only
+        // segment info file but its contents are different.
         if (hashEqualsContents()) {
             return hash.equals(other.hash);
         }
diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java
index a4ea5a316dc3e..344119c8023c3 100644
--- a/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java
+++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java
@@ -2734,6 +2734,7 @@ public void snapshotShard(SnapshotShardContext context) {
 
             final ShardGeneration indexGeneration;
             final boolean writeShardGens = SnapshotsService.useShardGenerations(context.getRepositoryMetaVersion());
+            final boolean writeFileInfoWriterUUID = SnapshotsService.includeFileInfoWriterUUID(context.getRepositoryMetaVersion());
             // build a new BlobStoreIndexShardSnapshot, that includes this one and all the saved ones
             List<SnapshotFiles> newSnapshotsList = new ArrayList<>();
             newSnapshotsList.add(new SnapshotFiles(snapshotId.getName(), indexCommitPointFiles, context.stateIdentifier()));
@@ -2748,11 +2749,16 @@ public void snapshotShard(SnapshotShardContext context) {
                 // reference a generation that has not had all its files fully upload.
                 indexGeneration = ShardGeneration.newGeneration();
                 try {
+                    final Map<String, String> serializationParams = Collections.singletonMap(
+                        BlobStoreIndexShardSnapshot.FileInfo.SERIALIZE_WRITER_UUID,
+                        Boolean.toString(writeFileInfoWriterUUID)
+                    );
                     INDEX_SHARD_SNAPSHOTS_FORMAT.write(
                         updatedBlobStoreIndexShardSnapshots,
                         shardContainer,
                         indexGeneration.toBlobNamePart(),
-                        compress
+                        compress,
+                        serializationParams
                     );
                 } catch (IOException e) {
                     throw new IndexShardSnapshotFailedException(
@@ -2786,7 +2792,11 @@ public void snapshotShard(SnapshotShardContext context) {
                         + blobsToDelete;
                 afterWriteSnapBlob = () -> {
                     try {
-                        writeShardIndexBlobAtomic(shardContainer, newGen, updatedBlobStoreIndexShardSnapshots);
+                        final Map<String, String> serializationParams = Collections.singletonMap(
+                            BlobStoreIndexShardSnapshot.FileInfo.SERIALIZE_WRITER_UUID,
+                            Boolean.toString(writeFileInfoWriterUUID)
+                        );
+                        writeShardIndexBlobAtomic(shardContainer, newGen, updatedBlobStoreIndexShardSnapshots, serializationParams);
                     } catch (IOException e) {
                         throw new IndexShardSnapshotFailedException(
                             shardId,
@@ -2830,7 +2840,17 @@ public void snapshotShard(SnapshotShardContext context) {
                 );
                 try {
                     final String snapshotUUID = snapshotId.getUUID();
-                    INDEX_SHARD_SNAPSHOT_FORMAT.write(blobStoreIndexShardSnapshot, shardContainer, snapshotUUID, compress);
+                    final Map<String, String> serializationParams = Collections.singletonMap(
+                        BlobStoreIndexShardSnapshot.FileInfo.SERIALIZE_WRITER_UUID,
+                        Boolean.toString(writeFileInfoWriterUUID)
+                    );
+                    INDEX_SHARD_SNAPSHOT_FORMAT.write(
+                        blobStoreIndexShardSnapshot,
+                        shardContainer,
+                        snapshotUUID,
+                        compress,
+                        serializationParams
+                    );
                 } catch (IOException e) {
                     throw new IndexShardSnapshotFailedException(shardId, "Failed to write commit point", e);
                 }
@@ -3232,7 +3252,7 @@ private ShardSnapshotMetaDeleteResult deleteFromShardSnapshotMeta(
                     INDEX_SHARD_SNAPSHOTS_FORMAT.write(updatedSnapshots, shardContainer, writtenGeneration.toBlobNamePart(), compress);
                 } else {
                     writtenGeneration = new ShardGeneration(indexGeneration);
-                    writeShardIndexBlobAtomic(shardContainer, indexGeneration, updatedSnapshots);
+                    writeShardIndexBlobAtomic(shardContainer, indexGeneration, updatedSnapshots, Collections.emptyMap());
                 }
                 final Set<String> survivingSnapshotUUIDs = survivingSnapshots.stream().map(SnapshotId::getUUID).collect(Collectors.toSet());
                 return new ShardSnapshotMetaDeleteResult(
@@ -3262,7 +3282,8 @@ private ShardSnapshotMetaDeleteResult deleteFromShardSnapshotMeta(
     private void writeShardIndexBlobAtomic(
         BlobContainer shardContainer,
         long indexGeneration,
-        BlobStoreIndexShardSnapshots updatedSnapshots
+        BlobStoreIndexShardSnapshots updatedSnapshots,
+        Map<String, String> serializationParams
     ) throws IOException {
         assert indexGeneration >= 0 : "Shard generation must not be negative but saw [" + indexGeneration + "]";
         logger.trace(
@@ -3272,7 +3293,7 @@ private void writeShardIndexBlobAtomic(
         writeAtomic(
             shardContainer,
             blobName,
-            out -> INDEX_SHARD_SNAPSHOTS_FORMAT.serialize(updatedSnapshots, blobName, compress, out),
+            out -> INDEX_SHARD_SNAPSHOTS_FORMAT.serialize(updatedSnapshots, blobName, compress, serializationParams, out),
             true
         );
     }
diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/ChecksumBlobStoreFormat.java
index 9882cd89caa61..56882819ca8e8 100644
--- a/server/src/main/java/org/elasticsearch/repositories/blobstore/ChecksumBlobStoreFormat.java
+++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/ChecksumBlobStoreFormat.java
@@ -38,6 +38,7 @@
 import java.io.OutputStream;
 import java.util.Collections;
 import java.util.Locale;
+import java.util.Map;
 import java.util.zip.CRC32;
 
 /**
@@ -271,11 +272,37 @@ private int getAvailable() throws IOException {
      * @param compress            whether to use compression
      */
     public void write(T obj, BlobContainer blobContainer, String name, boolean compress) throws IOException {
+        write(obj, blobContainer, name, compress, Collections.emptyMap());
+    }
+
+    /**
+     * Writes blob with resolving the blob name using {@link #blobName} method.
+     * <p>
+     * The blob will optionally by compressed.
+     *
+     * @param obj                            object to be serialized
+     * @param blobContainer                  blob container
+     * @param name                           blob name
+     * @param compress                       whether to use compression
+     * @param serializationParams            extra serialization parameters
+     */
+    public void write(T obj, BlobContainer blobContainer, String name, boolean compress, Map<String, String> serializationParams)
+        throws IOException {
         final String blobName = blobName(name);
-        blobContainer.writeBlob(blobName, false, false, out -> serialize(obj, blobName, compress, out));
+        blobContainer.writeBlob(blobName, false, false, out -> serialize(obj, blobName, compress, serializationParams, out));
     }
 
     public void serialize(final T obj, final String blobName, final boolean compress, OutputStream outputStream) throws IOException {
+        serialize(obj, blobName, compress, Collections.emptyMap(), outputStream);
+    }
+
+    public void serialize(
+        final T obj,
+        final String blobName,
+        final boolean compress,
+        final Map<String, String> extraParams,
+        OutputStream outputStream
+    ) throws IOException {
         try (
             OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput(
                 "ChecksumBlobStoreFormat.serialize(blob=\"" + blobName + "\")",
@@ -297,8 +324,12 @@ public void close() {
                     compress ? CompressorFactory.COMPRESSOR.threadLocalOutputStream(indexOutputOutputStream) : indexOutputOutputStream
                 )
             ) {
+                ToXContent.Params params = extraParams.isEmpty()
+                    ? SNAPSHOT_ONLY_FORMAT_PARAMS
+                    : new ToXContent.DelegatingMapParams(extraParams, SNAPSHOT_ONLY_FORMAT_PARAMS);
+
                 builder.startObject();
-                obj.toXContent(builder, SNAPSHOT_ONLY_FORMAT_PARAMS);
+                obj.toXContent(builder, params);
                 builder.endObject();
             }
             CodecUtil.writeFooter(indexOutput);
diff --git a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java
index db23fea8428db..c3acc298c6ecf 100644
--- a/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java
+++ b/server/src/main/java/org/elasticsearch/snapshots/SnapshotsService.java
@@ -130,6 +130,9 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
 
     public static final Version UUIDS_IN_REPO_DATA_VERSION = Version.V_7_12_0;
 
+    // TODO: Update to 7.16 after backporting
+    public static final Version FILE_INFO_WRITER_UUIDS_IN_SHARD_DATA_VERSION = Version.CURRENT;
+
     public static final Version OLD_SNAPSHOT_FORMAT = Version.V_7_5_0;
 
     public static final String POLICY_ID_METADATA_FIELD = "policy";
@@ -2374,6 +2377,10 @@ public static boolean includesUUIDs(Version repositoryMetaVersion) {
         return repositoryMetaVersion.onOrAfter(UUIDS_IN_REPO_DATA_VERSION);
     }
 
+    public static boolean includeFileInfoWriterUUID(Version repositoryMetaVersion) {
+        return repositoryMetaVersion.onOrAfter(FILE_INFO_WRITER_UUIDS_IN_SHARD_DATA_VERSION);
+    }
+
     /** Deletes snapshot from repository
      *
      * @param deleteEntry       delete entry in cluster state
diff --git a/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java b/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java
index 41533e15ed628..e2844e64b21eb 100644
--- a/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java
+++ b/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java
@@ -12,6 +12,7 @@
 import org.elasticsearch.ElasticsearchParseException;
 import org.elasticsearch.common.bytes.BytesReference;
 import org.elasticsearch.common.unit.ByteSizeValue;
+import org.elasticsearch.common.xcontent.ToXContent;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentFactory;
 import org.elasticsearch.common.xcontent.XContentParser;
@@ -22,7 +23,9 @@
 import org.elasticsearch.test.ESTestCase;
 
 import java.io.IOException;
+import java.util.Collections;
 
+import static org.elasticsearch.index.store.StoreFileMetadata.UNAVAILABLE_WRITER_UUID;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.is;
@@ -51,7 +54,11 @@ public void testToFromXContent() throws IOException {
             ByteSizeValue size = new ByteSizeValue(Math.abs(randomLong()));
             BlobStoreIndexShardSnapshot.FileInfo info = new BlobStoreIndexShardSnapshot.FileInfo("_foobar", meta, size);
             XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON).prettyPrint();
-            BlobStoreIndexShardSnapshot.FileInfo.toXContent(info, builder);
+            boolean serializeWriterUUID = randomBoolean();
+            ToXContent.Params params = new ToXContent.MapParams(
+                Collections.singletonMap(FileInfo.SERIALIZE_WRITER_UUID, Boolean.toString(serializeWriterUUID))
+            );
+            BlobStoreIndexShardSnapshot.FileInfo.toXContent(info, builder, params);
             byte[] xcontent = BytesReference.toBytes(BytesReference.bytes(shuffleXContent(builder)));
 
             final BlobStoreIndexShardSnapshot.FileInfo parsedInfo;
@@ -67,12 +74,17 @@ public void testToFromXContent() throws IOException {
             assertThat(parsedInfo.metadata().hash().length, equalTo(hash.length));
             assertThat(parsedInfo.metadata().hash(), equalTo(hash));
             assertThat(parsedInfo.metadata().writtenBy(), equalTo(Version.LATEST.toString()));
+            if (serializeWriterUUID) {
+                assertThat(parsedInfo.metadata().writerUuid(), equalTo(writerUuid));
+            } else {
+                assertThat(parsedInfo.metadata().writerUuid(), equalTo(UNAVAILABLE_WRITER_UUID));
+            }
             assertThat(parsedInfo.isSame(info.metadata()), is(true));
         }
     }
 
     private static BytesRef randomBytesRef(int maxSize) {
-        final BytesRef hash = new BytesRef(scaledRandomIntBetween(0, maxSize));
+        final BytesRef hash = new BytesRef(scaledRandomIntBetween(1, maxSize));
         hash.length = hash.bytes.length;
         for (int i = 0; i < hash.length; i++) {
             hash.bytes[i] = randomByte();

From 3148a53567c9b43a957c108e9a66b4ce27306e7d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Mon, 4 Oct 2021 17:43:42 +0200
Subject: [PATCH 08/10] Review comments

---
 .../java/org/elasticsearch/index/store/Store.java    |  4 ++--
 .../elasticsearch/index/store/StoreFileMetadata.java |  1 -
 .../blobstore/ChecksumBlobStoreFormat.java           |  8 ++++----
 .../index/snapshots/blobstore/FileInfoTests.java     | 12 +++++++++---
 .../org/elasticsearch/index/store/StoreTests.java    | 12 ++++++------
 5 files changed, 21 insertions(+), 16 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/store/Store.java b/server/src/main/java/org/elasticsearch/index/store/Store.java
index 44c0aa8be1064..50e252e99c04c 100644
--- a/server/src/main/java/org/elasticsearch/index/store/Store.java
+++ b/server/src/main/java/org/elasticsearch/index/store/Store.java
@@ -962,10 +962,10 @@ public RecoveryDiff recoveryDiff(final MetadataSnapshot targetSnapshot) {
             for (StoreFileMetadata sourceFile : this) {
                 if (sourceFile.name().startsWith("_")) {
                     final String segmentId = IndexFileNames.parseSegmentName(sourceFile.name());
-                    final long generation = IndexFileNames.parseGeneration(sourceFile.name());
+                    final boolean isGenerationalFile = IndexFileNames.parseGeneration(sourceFile.name()) > 0L;
                     final Tuple<List<StoreFileMetadata>, List<StoreFileMetadata>> perSegmentTuple = perSegmentSourceFiles
                         .computeIfAbsent(segmentId, k -> Tuple.tuple(new ArrayList<>(), new ArrayList<>()));
-                    (generation == 0 ? perSegmentTuple.v1() : perSegmentTuple.v2()).add(sourceFile);
+                    (isGenerationalFile ? perSegmentTuple.v2() : perSegmentTuple.v1()).add(sourceFile);
                 } else {
                     assert sourceFile.name().startsWith(IndexFileNames.SEGMENTS + "_") : "unexpected " + sourceFile;
                     perCommitSourceFiles.add(sourceFile);
diff --git a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
index 563541fe13536..a571391e7a1e4 100644
--- a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
+++ b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
@@ -73,7 +73,6 @@ public StoreFileMetadata(StreamInput in) throws IOException {
         } else {
             writerUuid = UNAVAILABLE_WRITER_UUID;
         }
-
     }
 
     @Override
diff --git a/server/src/main/java/org/elasticsearch/repositories/blobstore/ChecksumBlobStoreFormat.java b/server/src/main/java/org/elasticsearch/repositories/blobstore/ChecksumBlobStoreFormat.java
index 56882819ca8e8..79a83205cbac9 100644
--- a/server/src/main/java/org/elasticsearch/repositories/blobstore/ChecksumBlobStoreFormat.java
+++ b/server/src/main/java/org/elasticsearch/repositories/blobstore/ChecksumBlobStoreFormat.java
@@ -264,7 +264,7 @@ private int getAvailable() throws IOException {
     /**
      * Writes blob with resolving the blob name using {@link #blobName} method.
      * <p>
-     * The blob will optionally by compressed.
+     * The blob will optionally be compressed.
      *
      * @param obj                 object to be serialized
      * @param blobContainer       blob container
@@ -278,7 +278,7 @@ public void write(T obj, BlobContainer blobContainer, String name, boolean compr
     /**
      * Writes blob with resolving the blob name using {@link #blobName} method.
      * <p>
-     * The blob will optionally by compressed.
+     * The blob will optionally be compressed.
      *
      * @param obj                            object to be serialized
      * @param blobContainer                  blob container
@@ -292,7 +292,7 @@ public void write(T obj, BlobContainer blobContainer, String name, boolean compr
         blobContainer.writeBlob(blobName, false, false, out -> serialize(obj, blobName, compress, serializationParams, out));
     }
 
-    public void serialize(final T obj, final String blobName, final boolean compress, OutputStream outputStream) throws IOException {
+    public void serialize(final T obj, final String blobName, final boolean compress, final OutputStream outputStream) throws IOException {
         serialize(obj, blobName, compress, Collections.emptyMap(), outputStream);
     }
 
@@ -301,7 +301,7 @@ public void serialize(
         final String blobName,
         final boolean compress,
         final Map<String, String> extraParams,
-        OutputStream outputStream
+        final OutputStream outputStream
     ) throws IOException {
         try (
             OutputStreamIndexOutput indexOutput = new OutputStreamIndexOutput(
diff --git a/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java b/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java
index e2844e64b21eb..3dc9e6474d2b6 100644
--- a/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java
+++ b/server/src/test/java/org/elasticsearch/index/snapshots/blobstore/FileInfoTests.java
@@ -55,9 +55,15 @@ public void testToFromXContent() throws IOException {
             BlobStoreIndexShardSnapshot.FileInfo info = new BlobStoreIndexShardSnapshot.FileInfo("_foobar", meta, size);
             XContentBuilder builder = XContentFactory.contentBuilder(XContentType.JSON).prettyPrint();
             boolean serializeWriterUUID = randomBoolean();
-            ToXContent.Params params = new ToXContent.MapParams(
-                Collections.singletonMap(FileInfo.SERIALIZE_WRITER_UUID, Boolean.toString(serializeWriterUUID))
-            );
+            final ToXContent.Params params;
+            if (serializeWriterUUID && randomBoolean()) {
+                // We serialize by the writer uuid by default
+                params = new ToXContent.MapParams(Collections.emptyMap());
+            } else {
+                params = new ToXContent.MapParams(
+                    Collections.singletonMap(FileInfo.SERIALIZE_WRITER_UUID, Boolean.toString(serializeWriterUUID))
+                );
+            }
             BlobStoreIndexShardSnapshot.FileInfo.toXContent(info, builder, params);
             byte[] xcontent = BytesReference.toBytes(BytesReference.bytes(shuffleXContent(builder)));
 
diff --git a/server/src/test/java/org/elasticsearch/index/store/StoreTests.java b/server/src/test/java/org/elasticsearch/index/store/StoreTests.java
index 0b45a5984ec7a..33ef0007e5423 100644
--- a/server/src/test/java/org/elasticsearch/index/store/StoreTests.java
+++ b/server/src/test/java/org/elasticsearch/index/store/StoreTests.java
@@ -631,12 +631,12 @@ public void testRecoveryDiff() throws IOException, InterruptedException {
             iwc.setMergePolicy(NoMergePolicy.INSTANCE);
             iwc.setUseCompoundFile(random.nextBoolean());
             iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
-            IndexWriter writer = new IndexWriter(store.directory(), iwc);
-            final String newDocValue = TestUtil.randomRealisticUnicodeString(random());
-            logger.info("--> update doc [{}] with dv=[{}]", updateId, newDocValue);
-            writer.updateBinaryDocValue(new Term("id", updateId), "dv", new BytesRef(newDocValue));
-            writer.commit();
-            writer.close();
+            try(IndexWriter writer = new IndexWriter(store.directory(), iwc)) {
+                final String newDocValue = TestUtil.randomRealisticUnicodeString(random());
+                logger.info("--> update doc [{}] with dv=[{}]", updateId, newDocValue);
+                writer.updateBinaryDocValue(new Term("id", updateId), "dv", new BytesRef(newDocValue));
+                writer.commit();
+            }
             dvUpdateSnapshot = store.getMetadata(null);
         }
         logger.info("--> source: {}", dvUpdateSnapshot.asMap());

From c6f7a456e990eef379a2789c965c9b7ad7548cac Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Mon, 4 Oct 2021 18:49:36 +0200
Subject: [PATCH 09/10] Compute hashEqualContents eagerly

---
 .../org/elasticsearch/index/store/StoreFileMetadata.java  | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
index a571391e7a1e4..24ec77a787f2e 100644
--- a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
+++ b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
@@ -43,6 +43,8 @@ public class StoreFileMetadata implements Writeable {
 
     private final BytesRef writerUuid;
 
+    private final boolean hashEqualsContents;
+
     public StoreFileMetadata(String name, long length, String checksum, String writtenBy) {
         this(name, length, checksum, writtenBy, null, UNAVAILABLE_WRITER_UUID);
     }
@@ -57,6 +59,7 @@ public StoreFileMetadata(String name, long length, String checksum, String writt
 
         assert writerUuid != null && (writerUuid.length > 0 || writerUuid == UNAVAILABLE_WRITER_UUID);
         this.writerUuid = Objects.requireNonNull(writerUuid, "writerUuid must not be null");
+        this.hashEqualsContents = computeHashEqualContents();
     }
 
     /**
@@ -73,6 +76,7 @@ public StoreFileMetadata(StreamInput in) throws IOException {
         } else {
             writerUuid = UNAVAILABLE_WRITER_UUID;
         }
+        this.hashEqualsContents = computeHashEqualContents();
     }
 
     @Override
@@ -115,6 +119,10 @@ public String checksum() {
      * @return {@code true} iff {@link #hash()} will return the actual file contents
      */
     public boolean hashEqualsContents() {
+        return hashEqualsContents;
+    }
+
+    private boolean computeHashEqualContents() {
         if (hash.length == length) {
             try {
                 final boolean checksumsMatch = Store.digestToString(CodecUtil.retrieveChecksum(

From 8529206f8f5c7e98c4837786ed8209ad70d4099b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Francisco=20Fern=C3=A1ndez=20Casta=C3=B1o?=
 <francisco.fernandez.castano@gmail.com>
Date: Mon, 4 Oct 2021 19:25:35 +0200
Subject: [PATCH 10/10] Revert "Compute hashEqualContents eagerly"

This reverts commit c6f7a456e990eef379a2789c965c9b7ad7548cac.
---
 .../org/elasticsearch/index/store/StoreFileMetadata.java  | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
index 24ec77a787f2e..a571391e7a1e4 100644
--- a/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
+++ b/server/src/main/java/org/elasticsearch/index/store/StoreFileMetadata.java
@@ -43,8 +43,6 @@ public class StoreFileMetadata implements Writeable {
 
     private final BytesRef writerUuid;
 
-    private final boolean hashEqualsContents;
-
     public StoreFileMetadata(String name, long length, String checksum, String writtenBy) {
         this(name, length, checksum, writtenBy, null, UNAVAILABLE_WRITER_UUID);
     }
@@ -59,7 +57,6 @@ public StoreFileMetadata(String name, long length, String checksum, String writt
 
         assert writerUuid != null && (writerUuid.length > 0 || writerUuid == UNAVAILABLE_WRITER_UUID);
         this.writerUuid = Objects.requireNonNull(writerUuid, "writerUuid must not be null");
-        this.hashEqualsContents = computeHashEqualContents();
     }
 
     /**
@@ -76,7 +73,6 @@ public StoreFileMetadata(StreamInput in) throws IOException {
         } else {
             writerUuid = UNAVAILABLE_WRITER_UUID;
         }
-        this.hashEqualsContents = computeHashEqualContents();
     }
 
     @Override
@@ -119,10 +115,6 @@ public String checksum() {
      * @return {@code true} iff {@link #hash()} will return the actual file contents
      */
     public boolean hashEqualsContents() {
-        return hashEqualsContents;
-    }
-
-    private boolean computeHashEqualContents() {
         if (hash.length == length) {
             try {
                 final boolean checksumsMatch = Store.digestToString(CodecUtil.retrieveChecksum(