From 43804ae64e66854ebbb35ed108be92fb578cffdf Mon Sep 17 00:00:00 2001 From: Ali Beyad Date: Thu, 23 Feb 2017 12:11:47 -0500 Subject: [PATCH] Prioritize listing index-N blobs over index.latest in reading snapshots There are two ways to determine the latest index-N blob that contains the truth of the contents of the repository: (1) list all index-N blobs and figure out what the latest value of N is, and (2) read the index.latest blob, which contains the latest value of N explicitely. Note that the index.latest blob is not written atomically and can be re-written, as opposed to the index-N blobs which are never re-written (to create an updated index blob, index-{N+1} is written). Previously, the latest index-N was determined by first trying to read the index.latest blob and if that blob was missing (it was deleted before being re-written and in between deleting it and re-writing it, the system crashed), then all index-N blobs were listed to pick the highest N value. For non-read-only repositories, this could produce race conditions with the file system. In particular, it is possible that the index.latest blob is being read in order to serve a read request (e.g. get snapshots) and while doing so, an attempt is made to delete the index.latest blob and re-write it in order to finalize a snapshot operation. On some file systems (e.g. Windows), it is forbidden to delete a file while it is open for reading by another process/thread. This commit changes the priority so that figuring out the latest index-N blob is first done by listing all index-N blobs and determining the latest N value. If that values because the repository does not support listing blobs (e.g. the URL repository), then the index.latest blob is read. This is safe because in read-only repositories that do not support listing blobs, the index.latest blob is never deleted and then re-written, so the aforementioned issue does not arise. --- .../blobstore/BlobStoreRepository.java | 34 +++++++++---------- 1 file changed, 16 insertions(+), 18 deletions(-) diff --git a/core/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/core/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index bcf288dab2751..bc695d66d4441 100644 --- a/core/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/core/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -733,24 +733,22 @@ void writeIncompatibleSnapshots(RepositoryData repositoryData) throws IOExceptio */ long latestIndexBlobId() throws IOException { try { - // first, try reading the latest index generation from the index.latest blob + // First, try listing all index-N blobs (there should only be two index-N blobs at any given + // time in a repository if cleanup is happening properly) and pick the index-N blob with the + // highest N value - this will be the latest index blob for the repository. Note, we do this + // instead of directly reading the index.latest blob to get the current index-N blob because + // index.latest is not written atomically and is not immutable - on every index-N change, + // we first delete the old index.latest and then write the new one. If the repository is not + // read-only, it is possible that we try deleting the index.latest blob while it is being read + // by some other operation (such as the get snapshots operation). In some file systems, it is + // illegal to delete a file while it is being read elsewhere (e.g. Windows). For read-only + // repositories, we read for index.latest, both because listing blob prefixes is often unsupported + // and because the index.latest blob will never be deleted and re-written. + return listBlobsToGetLatestIndexId(); + } catch (UnsupportedOperationException e) { + // If its a read-only repository, listing blobs by prefix may not be supported (e.g. a URL repository), + // in this case, try reading the latest index generation from the index.latest blob return readSnapshotIndexLatestBlob(); - } catch (IOException ioe) { - // we could not find the index.latest blob, this can happen in two scenarios: - // (1) its an empty repository - // (2) when writing the index-latest blob, if the blob already exists, - // we first delete it, then atomically write the new blob. there is - // a small window in time when the blob is deleted and the new one - // written - if the node crashes during that time, we won't have an - // index-latest blob - // lets try to list all index-N blobs to determine the last one, if listing the blobs - // is not a supported operation (which is the case for read-only repositories), then - // assume its an empty repository. - try { - return listBlobsToGetLatestIndexId(); - } catch (UnsupportedOperationException uoe) { - return RepositoryData.EMPTY_REPO_GEN; - } } } @@ -765,7 +763,7 @@ long readSnapshotIndexLatestBlob() throws IOException { private long listBlobsToGetLatestIndexId() throws IOException { Map blobs = snapshotsBlobContainer.listBlobsByPrefix(INDEX_FILE_PREFIX); - long latest = -1; + long latest = RepositoryData.EMPTY_REPO_GEN; if (blobs.isEmpty()) { // no snapshot index blobs have been written yet return latest;