126126import java .util .Collections ;
127127import java .util .List ;
128128import java .util .Map ;
129+ import java .util .Optional ;
129130import java .util .Set ;
130131import java .util .concurrent .BlockingQueue ;
131132import java .util .concurrent .ConcurrentHashMap ;
@@ -1552,8 +1553,8 @@ private void writeAtomic(final String blobName, final BytesReference bytesRef, b
15521553
15531554 @ Override
15541555 public void snapshotShard (Store store , MapperService mapperService , SnapshotId snapshotId , IndexId indexId ,
1555- IndexCommit snapshotIndexCommit , IndexShardSnapshotStatus snapshotStatus , Version repositoryMetaVersion ,
1556- Map <String , Object > userMetadata , ActionListener <String > listener ) {
1556+ IndexCommit snapshotIndexCommit , String shardStateIdentifier , IndexShardSnapshotStatus snapshotStatus ,
1557+ Version repositoryMetaVersion , Map <String , Object > userMetadata , ActionListener <String > listener ) {
15571558 final ShardId shardId = store .shardId ();
15581559 final long startTime = threadPool .absoluteTimeInMillis ();
15591560 try {
@@ -1579,76 +1580,92 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
15791580 throw new IndexShardSnapshotFailedException (shardId ,
15801581 "Duplicate snapshot name [" + snapshotId .getName () + "] detected, aborting" );
15811582 }
1582-
1583- final List <BlobStoreIndexShardSnapshot .FileInfo > indexCommitPointFiles = new ArrayList <>();
1584- final BlockingQueue <BlobStoreIndexShardSnapshot .FileInfo > filesToSnapshot = new LinkedBlockingQueue <>();
1585- store .incRef ();
1586- final Collection <String > fileNames ;
1587- final Store .MetadataSnapshot metadataFromStore ;
1588- try {
1589- // TODO apparently we don't use the MetadataSnapshot#.recoveryDiff(...) here but we should
1590- try {
1591- logger .trace (
1592- "[{}] [{}] Loading store metadata using index commit [{}]" , shardId , snapshotId , snapshotIndexCommit );
1593- metadataFromStore = store .getMetadata (snapshotIndexCommit );
1594- fileNames = snapshotIndexCommit .getFileNames ();
1595- } catch (IOException e ) {
1596- throw new IndexShardSnapshotFailedException (shardId , "Failed to get store file metadata" , e );
1583+ // First inspect all known SegmentInfos instances to see if we already have an equivalent commit in the repository
1584+ final List <BlobStoreIndexShardSnapshot .FileInfo > filesFromSegmentInfos = Optional .ofNullable (shardStateIdentifier ).map (id -> {
1585+ for (SnapshotFiles snapshotFileSet : snapshots .snapshots ()) {
1586+ if (id .equals (snapshotFileSet .shardStateIdentifier ())) {
1587+ return snapshotFileSet .indexFiles ();
1588+ }
15971589 }
1598- } finally {
1599- store .decRef ();
1600- }
1590+ return null ;
1591+ }).orElse (null );
1592+
1593+ final List <BlobStoreIndexShardSnapshot .FileInfo > indexCommitPointFiles ;
16011594 int indexIncrementalFileCount = 0 ;
16021595 int indexTotalNumberOfFiles = 0 ;
16031596 long indexIncrementalSize = 0 ;
1604- long indexTotalFileCount = 0 ;
1605- for (String fileName : fileNames ) {
1606- if (snapshotStatus .isAborted ()) {
1607- logger .debug ("[{}] [{}] Aborted on the file [{}], exiting" , shardId , snapshotId , fileName );
1608- throw new IndexShardSnapshotFailedException (shardId , "Aborted" );
1597+ long indexTotalFileSize = 0 ;
1598+ final BlockingQueue <BlobStoreIndexShardSnapshot .FileInfo > filesToSnapshot = new LinkedBlockingQueue <>();
1599+ // If we did not find a set of files that is equal to the current commit we determine the files to upload by comparing files
1600+ // in the commit with files already in the repository
1601+ if (filesFromSegmentInfos == null ) {
1602+ indexCommitPointFiles = new ArrayList <>();
1603+ store .incRef ();
1604+ final Collection <String > fileNames ;
1605+ final Store .MetadataSnapshot metadataFromStore ;
1606+ try {
1607+ // TODO apparently we don't use the MetadataSnapshot#.recoveryDiff(...) here but we should
1608+ try {
1609+ logger .trace (
1610+ "[{}] [{}] Loading store metadata using index commit [{}]" , shardId , snapshotId , snapshotIndexCommit );
1611+ metadataFromStore = store .getMetadata (snapshotIndexCommit );
1612+ fileNames = snapshotIndexCommit .getFileNames ();
1613+ } catch (IOException e ) {
1614+ throw new IndexShardSnapshotFailedException (shardId , "Failed to get store file metadata" , e );
1615+ }
1616+ } finally {
1617+ store .decRef ();
16091618 }
1619+ for (String fileName : fileNames ) {
1620+ if (snapshotStatus .isAborted ()) {
1621+ logger .debug ("[{}] [{}] Aborted on the file [{}], exiting" , shardId , snapshotId , fileName );
1622+ throw new IndexShardSnapshotFailedException (shardId , "Aborted" );
1623+ }
16101624
1611- logger .trace ("[{}] [{}] Processing [{}]" , shardId , snapshotId , fileName );
1612- final StoreFileMetaData md = metadataFromStore .get (fileName );
1613- BlobStoreIndexShardSnapshot .FileInfo existingFileInfo = null ;
1614- List <BlobStoreIndexShardSnapshot .FileInfo > filesInfo = snapshots .findPhysicalIndexFiles (fileName );
1615- if (filesInfo != null ) {
1616- for (BlobStoreIndexShardSnapshot .FileInfo fileInfo : filesInfo ) {
1617- if (fileInfo .isSame (md )) {
1618- // a commit point file with the same name, size and checksum was already copied to repository
1619- // we will reuse it for this snapshot
1620- existingFileInfo = fileInfo ;
1621- break ;
1625+ logger .trace ("[{}] [{}] Processing [{}]" , shardId , snapshotId , fileName );
1626+ final StoreFileMetaData md = metadataFromStore .get (fileName );
1627+ BlobStoreIndexShardSnapshot .FileInfo existingFileInfo = null ;
1628+ List <BlobStoreIndexShardSnapshot .FileInfo > filesInfo = snapshots .findPhysicalIndexFiles (fileName );
1629+ if (filesInfo != null ) {
1630+ for (BlobStoreIndexShardSnapshot .FileInfo fileInfo : filesInfo ) {
1631+ if (fileInfo .isSame (md )) {
1632+ // a commit point file with the same name, size and checksum was already copied to repository
1633+ // we will reuse it for this snapshot
1634+ existingFileInfo = fileInfo ;
1635+ break ;
1636+ }
16221637 }
16231638 }
1624- }
16251639
1626- // We can skip writing blobs where the metadata hash is equal to the blob's contents because we store the hash/contents
1627- // directly in the shard level metadata in this case
1628- final boolean needsWrite = md .hashEqualsContents () == false ;
1629- indexTotalFileCount += md .length ();
1630- indexTotalNumberOfFiles ++;
1631-
1632- if (existingFileInfo == null ) {
1633- indexIncrementalFileCount ++;
1634- indexIncrementalSize += md .length ();
1635- // create a new FileInfo
1636- BlobStoreIndexShardSnapshot .FileInfo snapshotFileInfo =
1637- new BlobStoreIndexShardSnapshot .FileInfo (
1638- (needsWrite ? UPLOADED_DATA_BLOB_PREFIX : VIRTUAL_DATA_BLOB_PREFIX ) + UUIDs .randomBase64UUID (),
1639- md , chunkSize ());
1640- indexCommitPointFiles .add (snapshotFileInfo );
1641- if (needsWrite ) {
1642- filesToSnapshot .add (snapshotFileInfo );
1640+ // We can skip writing blobs where the metadata hash is equal to the blob's contents because we store the hash/contents
1641+ // directly in the shard level metadata in this case
1642+ final boolean needsWrite = md .hashEqualsContents () == false ;
1643+ indexTotalFileSize += md .length ();
1644+ indexTotalNumberOfFiles ++;
1645+
1646+ if (existingFileInfo == null ) {
1647+ indexIncrementalFileCount ++;
1648+ indexIncrementalSize += md .length ();
1649+ // create a new FileInfo
1650+ BlobStoreIndexShardSnapshot .FileInfo snapshotFileInfo =
1651+ new BlobStoreIndexShardSnapshot .FileInfo (
1652+ (needsWrite ? UPLOADED_DATA_BLOB_PREFIX : VIRTUAL_DATA_BLOB_PREFIX ) + UUIDs .randomBase64UUID (),
1653+ md , chunkSize ());
1654+ indexCommitPointFiles .add (snapshotFileInfo );
1655+ if (needsWrite ) {
1656+ filesToSnapshot .add (snapshotFileInfo );
1657+ }
1658+ assert needsWrite || assertFileContentsMatchHash (snapshotFileInfo , store );
1659+ } else {
1660+ indexCommitPointFiles .add (existingFileInfo );
16431661 }
1644- assert needsWrite || assertFileContentsMatchHash (snapshotFileInfo , store );
1645- } else {
1646- indexCommitPointFiles .add (existingFileInfo );
16471662 }
1663+ } else {
1664+ indexCommitPointFiles = filesFromSegmentInfos ;
16481665 }
16491666
16501667 snapshotStatus .moveToStarted (startTime , indexIncrementalFileCount ,
1651- indexTotalNumberOfFiles , indexIncrementalSize , indexTotalFileCount );
1668+ indexTotalNumberOfFiles , indexIncrementalSize , indexTotalFileSize );
16521669
16531670 final StepListener <Collection <Void >> allFilesUploadedListener = new StepListener <>();
16541671 allFilesUploadedListener .whenComplete (v -> {
@@ -1673,7 +1690,7 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
16731690 }
16741691 // build a new BlobStoreIndexShardSnapshot, that includes this one and all the saved ones
16751692 List <SnapshotFiles > newSnapshotsList = new ArrayList <>();
1676- newSnapshotsList .add (new SnapshotFiles (snapshot .snapshot (), snapshot .indexFiles ()));
1693+ newSnapshotsList .add (new SnapshotFiles (snapshot .snapshot (), snapshot .indexFiles (), shardStateIdentifier ));
16771694 for (SnapshotFiles point : snapshots ) {
16781695 newSnapshotsList .add (point );
16791696 }
@@ -1760,7 +1777,7 @@ public void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, Sh
17601777 final BlobContainer container = shardContainer (indexId , snapshotShardId );
17611778 executor .execute (ActionRunnable .wrap (restoreListener , l -> {
17621779 final BlobStoreIndexShardSnapshot snapshot = loadShardSnapshot (container , snapshotId );
1763- final SnapshotFiles snapshotFiles = new SnapshotFiles (snapshot .snapshot (), snapshot .indexFiles ());
1780+ final SnapshotFiles snapshotFiles = new SnapshotFiles (snapshot .snapshot (), snapshot .indexFiles (), null );
17641781 new FileRestoreContext (metadata .name (), shardId , snapshotId , recoveryState ) {
17651782 @ Override
17661783 protected void restoreFiles (List <BlobStoreIndexShardSnapshot .FileInfo > filesToRecover , Store store ,
0 commit comments