125125import java .util .Collections ;
126126import java .util .List ;
127127import java .util .Map ;
128+ import java .util .Optional ;
128129import java .util .Set ;
129130import java .util .concurrent .BlockingQueue ;
130131import java .util .concurrent .ConcurrentHashMap ;
@@ -1534,8 +1535,8 @@ private void writeAtomic(final String blobName, final BytesReference bytesRef, b
15341535
15351536 @ Override
15361537 public void snapshotShard (Store store , MapperService mapperService , SnapshotId snapshotId , IndexId indexId ,
1537- IndexCommit snapshotIndexCommit , IndexShardSnapshotStatus snapshotStatus , Version repositoryMetaVersion ,
1538- Map <String , Object > userMetadata , ActionListener <String > listener ) {
1538+ IndexCommit snapshotIndexCommit , String shardStateIdentifier , IndexShardSnapshotStatus snapshotStatus ,
1539+ Version repositoryMetaVersion , Map <String , Object > userMetadata , ActionListener <String > listener ) {
15391540 final ShardId shardId = store .shardId ();
15401541 final long startTime = threadPool .absoluteTimeInMillis ();
15411542 try {
@@ -1561,76 +1562,92 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
15611562 throw new IndexShardSnapshotFailedException (shardId ,
15621563 "Duplicate snapshot name [" + snapshotId .getName () + "] detected, aborting" );
15631564 }
1564-
1565- final List <BlobStoreIndexShardSnapshot .FileInfo > indexCommitPointFiles = new ArrayList <>();
1566- final BlockingQueue <BlobStoreIndexShardSnapshot .FileInfo > filesToSnapshot = new LinkedBlockingQueue <>();
1567- store .incRef ();
1568- final Collection <String > fileNames ;
1569- final Store .MetadataSnapshot metadataFromStore ;
1570- try {
1571- // TODO apparently we don't use the MetadataSnapshot#.recoveryDiff(...) here but we should
1572- try {
1573- logger .trace (
1574- "[{}] [{}] Loading store metadata using index commit [{}]" , shardId , snapshotId , snapshotIndexCommit );
1575- metadataFromStore = store .getMetadata (snapshotIndexCommit );
1576- fileNames = snapshotIndexCommit .getFileNames ();
1577- } catch (IOException e ) {
1578- throw new IndexShardSnapshotFailedException (shardId , "Failed to get store file metadata" , e );
1565+ // First inspect all known SegmentInfos instances to see if we already have an equivalent commit in the repository
1566+ final List <BlobStoreIndexShardSnapshot .FileInfo > filesFromSegmentInfos = Optional .ofNullable (shardStateIdentifier ).map (id -> {
1567+ for (SnapshotFiles snapshotFileSet : snapshots .snapshots ()) {
1568+ if (id .equals (snapshotFileSet .shardStateIdentifier ())) {
1569+ return snapshotFileSet .indexFiles ();
1570+ }
15791571 }
1580- } finally {
1581- store .decRef ();
1582- }
1572+ return null ;
1573+ }).orElse (null );
1574+
1575+ final List <BlobStoreIndexShardSnapshot .FileInfo > indexCommitPointFiles ;
15831576 int indexIncrementalFileCount = 0 ;
15841577 int indexTotalNumberOfFiles = 0 ;
15851578 long indexIncrementalSize = 0 ;
1586- long indexTotalFileCount = 0 ;
1587- for (String fileName : fileNames ) {
1588- if (snapshotStatus .isAborted ()) {
1589- logger .debug ("[{}] [{}] Aborted on the file [{}], exiting" , shardId , snapshotId , fileName );
1590- throw new IndexShardSnapshotFailedException (shardId , "Aborted" );
1579+ long indexTotalFileSize = 0 ;
1580+ final BlockingQueue <BlobStoreIndexShardSnapshot .FileInfo > filesToSnapshot = new LinkedBlockingQueue <>();
1581+ // If we did not find a set of files that is equal to the current commit we determine the files to upload by comparing files
1582+ // in the commit with files already in the repository
1583+ if (filesFromSegmentInfos == null ) {
1584+ indexCommitPointFiles = new ArrayList <>();
1585+ store .incRef ();
1586+ final Collection <String > fileNames ;
1587+ final Store .MetadataSnapshot metadataFromStore ;
1588+ try {
1589+ // TODO apparently we don't use the MetadataSnapshot#.recoveryDiff(...) here but we should
1590+ try {
1591+ logger .trace (
1592+ "[{}] [{}] Loading store metadata using index commit [{}]" , shardId , snapshotId , snapshotIndexCommit );
1593+ metadataFromStore = store .getMetadata (snapshotIndexCommit );
1594+ fileNames = snapshotIndexCommit .getFileNames ();
1595+ } catch (IOException e ) {
1596+ throw new IndexShardSnapshotFailedException (shardId , "Failed to get store file metadata" , e );
1597+ }
1598+ } finally {
1599+ store .decRef ();
15911600 }
1601+ for (String fileName : fileNames ) {
1602+ if (snapshotStatus .isAborted ()) {
1603+ logger .debug ("[{}] [{}] Aborted on the file [{}], exiting" , shardId , snapshotId , fileName );
1604+ throw new IndexShardSnapshotFailedException (shardId , "Aborted" );
1605+ }
15921606
1593- logger .trace ("[{}] [{}] Processing [{}]" , shardId , snapshotId , fileName );
1594- final StoreFileMetaData md = metadataFromStore .get (fileName );
1595- BlobStoreIndexShardSnapshot .FileInfo existingFileInfo = null ;
1596- List <BlobStoreIndexShardSnapshot .FileInfo > filesInfo = snapshots .findPhysicalIndexFiles (fileName );
1597- if (filesInfo != null ) {
1598- for (BlobStoreIndexShardSnapshot .FileInfo fileInfo : filesInfo ) {
1599- if (fileInfo .isSame (md )) {
1600- // a commit point file with the same name, size and checksum was already copied to repository
1601- // we will reuse it for this snapshot
1602- existingFileInfo = fileInfo ;
1603- break ;
1607+ logger .trace ("[{}] [{}] Processing [{}]" , shardId , snapshotId , fileName );
1608+ final StoreFileMetaData md = metadataFromStore .get (fileName );
1609+ BlobStoreIndexShardSnapshot .FileInfo existingFileInfo = null ;
1610+ List <BlobStoreIndexShardSnapshot .FileInfo > filesInfo = snapshots .findPhysicalIndexFiles (fileName );
1611+ if (filesInfo != null ) {
1612+ for (BlobStoreIndexShardSnapshot .FileInfo fileInfo : filesInfo ) {
1613+ if (fileInfo .isSame (md )) {
1614+ // a commit point file with the same name, size and checksum was already copied to repository
1615+ // we will reuse it for this snapshot
1616+ existingFileInfo = fileInfo ;
1617+ break ;
1618+ }
16041619 }
16051620 }
1606- }
16071621
1608- // We can skip writing blobs where the metadata hash is equal to the blob's contents because we store the hash/contents
1609- // directly in the shard level metadata in this case
1610- final boolean needsWrite = md .hashEqualsContents () == false ;
1611- indexTotalFileCount += md .length ();
1612- indexTotalNumberOfFiles ++;
1613-
1614- if (existingFileInfo == null ) {
1615- indexIncrementalFileCount ++;
1616- indexIncrementalSize += md .length ();
1617- // create a new FileInfo
1618- BlobStoreIndexShardSnapshot .FileInfo snapshotFileInfo =
1619- new BlobStoreIndexShardSnapshot .FileInfo (
1620- (needsWrite ? UPLOADED_DATA_BLOB_PREFIX : VIRTUAL_DATA_BLOB_PREFIX ) + UUIDs .randomBase64UUID (),
1621- md , chunkSize ());
1622- indexCommitPointFiles .add (snapshotFileInfo );
1623- if (needsWrite ) {
1624- filesToSnapshot .add (snapshotFileInfo );
1622+ // We can skip writing blobs where the metadata hash is equal to the blob's contents because we store the hash/contents
1623+ // directly in the shard level metadata in this case
1624+ final boolean needsWrite = md .hashEqualsContents () == false ;
1625+ indexTotalFileSize += md .length ();
1626+ indexTotalNumberOfFiles ++;
1627+
1628+ if (existingFileInfo == null ) {
1629+ indexIncrementalFileCount ++;
1630+ indexIncrementalSize += md .length ();
1631+ // create a new FileInfo
1632+ BlobStoreIndexShardSnapshot .FileInfo snapshotFileInfo =
1633+ new BlobStoreIndexShardSnapshot .FileInfo (
1634+ (needsWrite ? UPLOADED_DATA_BLOB_PREFIX : VIRTUAL_DATA_BLOB_PREFIX ) + UUIDs .randomBase64UUID (),
1635+ md , chunkSize ());
1636+ indexCommitPointFiles .add (snapshotFileInfo );
1637+ if (needsWrite ) {
1638+ filesToSnapshot .add (snapshotFileInfo );
1639+ }
1640+ assert needsWrite || assertFileContentsMatchHash (snapshotFileInfo , store );
1641+ } else {
1642+ indexCommitPointFiles .add (existingFileInfo );
16251643 }
1626- assert needsWrite || assertFileContentsMatchHash (snapshotFileInfo , store );
1627- } else {
1628- indexCommitPointFiles .add (existingFileInfo );
16291644 }
1645+ } else {
1646+ indexCommitPointFiles = filesFromSegmentInfos ;
16301647 }
16311648
16321649 snapshotStatus .moveToStarted (startTime , indexIncrementalFileCount ,
1633- indexTotalNumberOfFiles , indexIncrementalSize , indexTotalFileCount );
1650+ indexTotalNumberOfFiles , indexIncrementalSize , indexTotalFileSize );
16341651
16351652 final StepListener <Collection <Void >> allFilesUploadedListener = new StepListener <>();
16361653 allFilesUploadedListener .whenComplete (v -> {
@@ -1655,7 +1672,7 @@ public void snapshotShard(Store store, MapperService mapperService, SnapshotId s
16551672 }
16561673 // build a new BlobStoreIndexShardSnapshot, that includes this one and all the saved ones
16571674 List <SnapshotFiles > newSnapshotsList = new ArrayList <>();
1658- newSnapshotsList .add (new SnapshotFiles (snapshot .snapshot (), snapshot .indexFiles ()));
1675+ newSnapshotsList .add (new SnapshotFiles (snapshot .snapshot (), snapshot .indexFiles (), shardStateIdentifier ));
16591676 for (SnapshotFiles point : snapshots ) {
16601677 newSnapshotsList .add (point );
16611678 }
@@ -1742,7 +1759,7 @@ public void restoreShard(Store store, SnapshotId snapshotId, IndexId indexId, Sh
17421759 final BlobContainer container = shardContainer (indexId , snapshotShardId );
17431760 executor .execute (ActionRunnable .wrap (restoreListener , l -> {
17441761 final BlobStoreIndexShardSnapshot snapshot = loadShardSnapshot (container , snapshotId );
1745- final SnapshotFiles snapshotFiles = new SnapshotFiles (snapshot .snapshot (), snapshot .indexFiles ());
1762+ final SnapshotFiles snapshotFiles = new SnapshotFiles (snapshot .snapshot (), snapshot .indexFiles (), null );
17461763 new FileRestoreContext (metadata .name (), shardId , snapshotId , recoveryState ) {
17471764 @ Override
17481765 protected void restoreFiles (List <BlobStoreIndexShardSnapshot .FileInfo > filesToRecover , Store store ,
0 commit comments