Skip to content

Commit 8794541

Browse files
CR: Javadoc + method on store file metadata
1 parent 61a468e commit 8794541

File tree

2 files changed

+32
-1
lines changed

2 files changed

+32
-1
lines changed

server/src/main/java/org/elasticsearch/index/store/StoreFileMetaData.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,11 +19,13 @@
1919

2020
package org.elasticsearch.index.store;
2121

22+
import org.apache.lucene.codecs.CodecUtil;
2223
import org.apache.lucene.util.BytesRef;
2324
import org.apache.lucene.util.Version;
2425
import org.elasticsearch.common.io.stream.StreamInput;
2526
import org.elasticsearch.common.io.stream.StreamOutput;
2627
import org.elasticsearch.common.io.stream.Writeable;
28+
import org.elasticsearch.common.lucene.store.ByteArrayIndexInput;
2729

2830
import java.io.IOException;
2931
import java.text.ParseException;
@@ -100,6 +102,29 @@ public String checksum() {
100102
return this.checksum;
101103
}
102104

105+
/**
106+
* Checks if the bytes returned by {@link #hash()} are the contents of the file that this instances refers to.
107+
*
108+
* @return {@code true} iff {@link #hash()} will return the actual file contents
109+
*/
110+
public boolean hashEqualsContents() {
111+
if (hash.length == length) {
112+
try {
113+
final boolean checksumsMatch = Store.digestToString(CodecUtil.retrieveChecksum(
114+
new ByteArrayIndexInput("store_file", hash.bytes, hash.offset, hash.length))).equals(checksum);
115+
assert checksumsMatch : "Checksums did not match for [" + this + "] which has a hash of [" + hash + "]";
116+
return checksumsMatch;
117+
} catch (Exception e) {
118+
// Hash didn't contain any bytes that Lucene could extract a checksum from so we can't verify against the checksum of the
119+
// original file. We should never see an exception here because lucene files are assumed to always contain the checksum
120+
// footer.
121+
assert false : new AssertionError("Saw exception for hash [" + hash + "] but expected it to be Lucene file", e);
122+
return false;
123+
}
124+
}
125+
return false;
126+
}
127+
103128
/**
104129
* Returns <code>true</code> iff the length and the checksums are the same. otherwise <code>false</code>
105130
*/

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,12 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
182182

183183
private static final String UPLOADED_DATA_BLOB_PREFIX = "__";
184184

185+
/**
186+
* Prefix used for the identifiers of data blobs that were not actually written to the repository physically because their contents are
187+
* already stored in the metadata referencing them, i.e. in {@link BlobStoreIndexShardSnapshot} and
188+
* {@link BlobStoreIndexShardSnapshots}. This is the case for files for which {@link StoreFileMetaData#hashEqualsContents()} is
189+
* {@code true}.
190+
*/
185191
private static final String VIRTUAL_DATA_BLOB_PREFIX = "v__";
186192

187193
/**
@@ -1521,7 +1527,7 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
15211527

15221528
// We can skip writing blobs where the metadata hash is equal to the blob's contents because we store the hash/contents
15231529
// directly in the shard level metadata in this case
1524-
final boolean needsWrite = md.hash().length != md.length();
1530+
final boolean needsWrite = md.hashEqualsContents() == false;
15251531
indexTotalFileCount += md.length();
15261532
indexTotalNumberOfFiles++;
15271533

0 commit comments

Comments
 (0)