From a9ec88847f441e51ae06d4de9bbbf0feaa706a1b Mon Sep 17 00:00:00 2001 From: jamz Date: Fri, 26 Jun 2020 11:51:48 -0700 Subject: [PATCH] S3AFS - Do not attempt to resolve symlinks in globStatus S3AFS does not support symlinks, so attempting to resolve symlinks in globStatus causes wasted S3 calls and worse performance. Removing it will speed up some calls to globStatus. JIRA link: https://issues.apache.org/jira/browse/HADOOP-17105 --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 10 +++-- .../fs/s3a/ITestS3AFileOperationCost.java | 44 +++++++++++++++++++ 2 files changed, 50 insertions(+), 4 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index fa0251aa73aa2..3502bdf010686 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -3977,6 +3977,8 @@ public boolean isMagicCommitPath(Path path) { /** * Increments the statistic {@link Statistic#INVOCATION_GLOB_STATUS}. + * Override superclass so as to disable symlink resolution as symlinks + * are not supported by S3A. * {@inheritDoc} */ @Override @@ -3985,9 +3987,9 @@ public FileStatus[] globStatus(Path pathPattern) throws IOException { } /** - * Override superclass so as to disable symlink resolution and so avoid - * some calls to the FS which may have problems when the store is being - * inconsistent. + * Increments the statistic {@link Statistic#INVOCATION_GLOB_STATUS}. + * Override superclass so as to disable symlink resolution as symlinks + * are not supported by S3A. * {@inheritDoc} */ @Override @@ -3999,7 +4001,7 @@ public FileStatus[] globStatus( return Globber.createGlobber(this) .withPathPattern(pathPattern) .withPathFiltern(filter) - .withResolveSymlinks(true) + .withResolveSymlinks(false) .build() .glob(); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java index b2b983c4d4df7..cd8d7d5d53a3a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AFileOperationCost.java @@ -574,4 +574,48 @@ public void testCreateCost() throws Throwable { } } + + @Test + public void testCostOfGlobStatus() throws Throwable { + describe("Test globStatus has expected cost"); + S3AFileSystem fs = getFileSystem(); + assume("Unguarded FS only", !fs.hasMetadataStore()); + + Path basePath = path("testCostOfGlobStatus/nextFolder/"); + + // create a bunch of files + int filesToCreate = 10; + for (int i = 0; i < filesToCreate; i++) { + try (FSDataOutputStream out = fs.create(basePath.suffix("/" + i))) { + verifyOperationCount(1, 1); + } + } + + fs.globStatus(basePath.suffix("/*")); + // 2 head + 1 list from getFileStatus on path, + // plus 1 list to match the glob pattern + verifyOperationCount(2, 2); + } + + @Test + public void testCostOfGlobStatusNoSymlinkResolution() throws Throwable { + describe("Test globStatus does not attempt to resolve symlinks"); + S3AFileSystem fs = getFileSystem(); + assume("Unguarded FS only", !fs.hasMetadataStore()); + + Path basePath = path("testCostOfGlobStatusNoSymlinkResolution/f/"); + + // create a single file, globStatus returning a single file on a pattern + // triggers attempts at symlinks resolution if configured + String fileName = "/notASymlinkDOntResolveMeLikeOne"; + try (FSDataOutputStream out = fs.create(basePath.suffix(fileName))) { + verifyOperationCount(1, 1); + } + + fs.globStatus(basePath.suffix("/*")); + // unguarded: 2 head + 1 list from getFileStatus on path, + // plus 1 list to match the glob pattern + // no additional operations from symlink resolution + verifyOperationCount(2, 2); + } }