diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 5ae60d7a91aa5..c9a3d51f71594 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1747,7 +1747,7 @@ fs.s3a.committer.staging.conflict-mode - fail + append Staging committer conflict resolution policy. Supported: "fail", "append", "replace". diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/CommitConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/CommitConstants.java index 03cfcba82a53a..877433bab2a0e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/CommitConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/CommitConstants.java @@ -198,7 +198,7 @@ private CommitConstants() { public static final String CONFLICT_MODE_REPLACE = "replace"; /** Default conflict mode: {@value}. */ - public static final String DEFAULT_CONFLICT_MODE = CONFLICT_MODE_FAIL; + public static final String DEFAULT_CONFLICT_MODE = CONFLICT_MODE_APPEND; /** * Number of threads in committers for parallel operations on files diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/DirectoryStagingCommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/DirectoryStagingCommitter.java index 23bb06bd923a8..32642c9e98fad 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/DirectoryStagingCommitter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/DirectoryStagingCommitter.java @@ -18,6 +18,7 @@ package org.apache.hadoop.fs.s3a.commit.staging; +import java.io.FileNotFoundException; import java.io.IOException; import java.util.List; @@ -25,8 +26,11 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathExistsException; +import org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants; import org.apache.hadoop.fs.s3a.commit.files.SinglePendingCommit; import org.apache.hadoop.mapreduce.JobContext; import org.apache.hadoop.mapreduce.TaskAttemptContext; @@ -65,11 +69,31 @@ public void setupJob(JobContext context) throws IOException { super.setupJob(context); Path outputPath = getOutputPath(); FileSystem fs = getDestFS(); - if (getConflictResolutionMode(context, fs.getConf()) - == ConflictResolution.FAIL - && fs.exists(outputPath)) { - throw failDestinationExists(outputPath, - "Setting job as " + getRole()); + ConflictResolution conflictResolution = getConflictResolutionMode( + context, fs.getConf()); + LOG.info("Conflict Resolution mode is {}", conflictResolution); + try { + final FileStatus status = fs.getFileStatus(outputPath); + + // if it is not a directory, fail fast for all conflict options. + if (!status.isDirectory()) { + throw new PathExistsException(outputPath.toString(), + "output path is not a directory: " + + InternalCommitterConstants.E_DEST_EXISTS); + } + switch(conflictResolution) { + case FAIL: + throw failDestinationExists(outputPath, + "Setting job as " + getRole()); + case APPEND: + case REPLACE: + LOG.debug("Destination directory exists; conflict policy permits this"); + } + } catch (FileNotFoundException ignored) { + // there is no destination path, hence, no conflict. + // make the parent directory, which also triggers a recursive directory + // creation operation + fs.mkdirs(outputPath); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/StagingCommitter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/StagingCommitter.java index 518d789718a80..7ec447863c150 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/StagingCommitter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/commit/staging/StagingCommitter.java @@ -842,7 +842,7 @@ public final ConflictResolution getConflictResolutionMode( Configuration fsConf) { if (conflictResolution == null) { this.conflictResolution = ConflictResolution.valueOf( - getConfictModeOption(context, fsConf)); + getConfictModeOption(context, fsConf, DEFAULT_CONFLICT_MODE)); } return conflictResolution; } @@ -889,14 +889,15 @@ protected PathExistsException failDestinationExists(final Path path, * Get the conflict mode option string. * @param context context with the config * @param fsConf filesystem config + * @param defVal default value. * @return the trimmed configuration option, upper case. */ public static String getConfictModeOption(JobContext context, - Configuration fsConf) { + Configuration fsConf, String defVal) { return getConfigurationOption(context, fsConf, FS_S3A_COMMITTER_STAGING_CONFLICT_MODE, - DEFAULT_CONFLICT_MODE).toUpperCase(Locale.ENGLISH); + defVal).toUpperCase(Locale.ENGLISH); } } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md index ef9c999359b0c..ac03bdcdb7a71 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/committers.md @@ -173,7 +173,7 @@ This then is the problem which the S3A committers address: *How to safely and reliably commit work to Amazon S3 or compatible object store* -## Meet the S3A Commmitters +## Meet the S3A Committers Since Hadoop 3.1, the S3A FileSystem has been accompanied by classes designed to integrate with the Hadoop and Spark job commit protocols, classes @@ -226,8 +226,8 @@ it is committed through the standard "v1" commit algorithm. When the Job is committed, the Job Manager reads the lists of pending writes from its HDFS Job destination directory and completes those uploads. -Cancelling a task is straightforward: the local directory is deleted with -its staged data. Cancelling a job is achieved by reading in the lists of +Canceling a task is straightforward: the local directory is deleted with +its staged data. Canceling a job is achieved by reading in the lists of pending writes from the HDFS job attempt directory, and aborting those uploads. For extra safety, all outstanding multipart writes to the destination directory are aborted. @@ -537,9 +537,8 @@ Conflict management is left to the execution engine itself. |--------|-------|-----------|-------------|---------|---------| | `mapreduce.fileoutputcommitter.marksuccessfuljobs` | X | X | X | Write a `_SUCCESS` file at the end of each job | `true` | | `fs.s3a.committer.threads` | X | X | X | Number of threads in committers for parallel operations on files. | 8 | -| `fs.s3a.committer.staging.conflict-mode` | | X | X | Conflict resolution: `fail`, `abort` or `overwrite`| `fail` | +| `fs.s3a.committer.staging.conflict-mode` | | X | X | Conflict resolution: `fail`, `append` or `replace`| `append` | | `fs.s3a.committer.staging.unique-filenames` | | X | X | Generate unique filenames | `true` | - | `fs.s3a.committer.magic.enabled` | X | | | Enable "magic committer" support in the filesystem | `false` | @@ -607,7 +606,7 @@ Conflict management is left to the execution engine itself. fs.s3a.committer.staging.conflict-mode - fail + append Staging committer conflict resolution policy. Supported: "fail", "append", "replace". diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java index 99f1a07968de1..d421118013829 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/MockS3AFileSystem.java @@ -42,6 +42,8 @@ import org.apache.hadoop.fs.s3a.s3guard.BulkOperationState; import org.apache.hadoop.util.Progressable; +import static com.google.common.base.Preconditions.checkNotNull; + /** * Relays FS calls to the mocked FS, allows for some extra logging with * stack traces to be included, stubbing out other methods @@ -240,6 +242,12 @@ public void setWorkingDirectory(Path newDir) { mock.setWorkingDirectory(newDir); } + @Override + public boolean mkdirs(Path f) throws IOException { + event("mkdirs(%s)", f); + return mock.mkdirs(f); + } + @Override public boolean mkdirs(Path f, FsPermission permission) throws IOException { event("mkdirs(%s)", f); @@ -249,7 +257,8 @@ public boolean mkdirs(Path f, FsPermission permission) throws IOException { @Override public FileStatus getFileStatus(Path f) throws IOException { event("getFileStatus(%s)", f); - return mock.getFileStatus(f); + return checkNotNull(mock.getFileStatus(f), + "Mock getFileStatus(%s) returned null", f); } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java index e8645b84bad72..6023daa13875b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractCommitITest.java @@ -18,10 +18,8 @@ package org.apache.hadoop.fs.s3a.commit; -import java.io.FileNotFoundException; import java.io.IOException; import java.io.InterruptedIOException; -import java.util.ArrayList; import java.util.List; import java.util.stream.Collectors; @@ -53,7 +51,6 @@ import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.MultipartTestUtils.listMultipartUploads; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; -import static org.apache.hadoop.fs.s3a.S3AUtils.applyLocatedFiles; import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; /** @@ -109,6 +106,13 @@ protected Path path(String filepath) throws IOException { @Override protected Configuration createConfiguration() { Configuration conf = super.createConfiguration(); + String bucketName = getTestBucketName(conf); + removeBucketOverrides(bucketName, conf, + MAGIC_COMMITTER_ENABLED, + S3A_COMMITTER_FACTORY_KEY, + FS_S3A_COMMITTER_NAME, + FS_S3A_COMMITTER_STAGING_CONFLICT_MODE); + conf.setBoolean(MAGIC_COMMITTER_ENABLED, true); conf.setLong(MIN_MULTIPART_THRESHOLD, MULTIPART_MIN_SIZE); conf.setInt(MULTIPART_SIZE, MULTIPART_MIN_SIZE); @@ -355,25 +359,7 @@ private String pathToPrefix(Path path) { * @throws IOException IO Failure */ protected SuccessData verifySuccessMarker(Path dir) throws IOException { - assertPathExists("Success marker", - new Path(dir, _SUCCESS)); - SuccessData successData = loadSuccessMarker(dir); - log().info("Success data {}", successData.toString()); - log().info("Metrics\n{}", - successData.dumpMetrics(" ", " = ", "\n")); - log().info("Diagnostics\n{}", - successData.dumpDiagnostics(" ", " = ", "\n")); - return successData; - } - - /** - * Load the success marker and return the data inside it. - * @param dir directory containing the marker - * @return the loaded data - * @throws IOException on any failure to load or validate the data - */ - protected SuccessData loadSuccessMarker(Path dir) throws IOException { - return SuccessData.load(getFileSystem(), new Path(dir, _SUCCESS)); + return validateSuccessFile(dir, "", getFileSystem(), "query"); } /** @@ -447,32 +433,18 @@ public static TaskAttemptContext taskAttemptForJob(JobId jobId, /** * Load in the success data marker: this guarantees that an S3A * committer was used. - * @param fs filesystem * @param outputPath path of job - * @param committerName name of committer to match + * @param committerName name of committer to match, or "" + * @param fs filesystem + * @param origin origin (e.g. "teragen" for messages) * @return the success data * @throws IOException IO failure */ - public static SuccessData validateSuccessFile(final S3AFileSystem fs, - final Path outputPath, final String committerName) throws IOException { - SuccessData successData = null; - try { - successData = loadSuccessFile(fs, outputPath); - } catch (FileNotFoundException e) { - // either the output path is missing or, if its the success file, - // somehow the relevant committer wasn't picked up. - String dest = outputPath.toString(); - LOG.error("No _SUCCESS file found under {}", dest); - List files = new ArrayList<>(); - applyLocatedFiles(fs.listFiles(outputPath, true), - (status) -> { - files.add(status.getPath().toString()); - LOG.error("{} {}", status.getPath(), status.getLen()); - }); - throw new AssertionError("No _SUCCESS file in " + dest - + "; found : " + files.stream().collect(Collectors.joining("\n")), - e); - } + public static SuccessData validateSuccessFile(final Path outputPath, + final String committerName, + final S3AFileSystem fs, + final String origin) throws IOException { + SuccessData successData = loadSuccessFile(fs, outputPath, origin); String commitDetails = successData.toString(); LOG.info("Committer name " + committerName + "\n{}", commitDetails); @@ -480,8 +452,10 @@ public static SuccessData validateSuccessFile(final S3AFileSystem fs, successData.dumpMetrics(" ", " = ", "\n")); LOG.info("Diagnostics\n{}", successData.dumpDiagnostics(" ", " = ", "\n")); - assertEquals("Wrong committer in " + commitDetails, - committerName, successData.getCommitter()); + if (!committerName.isEmpty()) { + assertEquals("Wrong committer in " + commitDetails, + committerName, successData.getCommitter()); + } return successData; } @@ -489,16 +463,29 @@ public static SuccessData validateSuccessFile(final S3AFileSystem fs, * Load a success file; fail if the file is empty/nonexistent. * @param fs filesystem * @param outputPath directory containing the success file. + * @param origin origin of the file * @return the loaded file. * @throws IOException failure to find/load the file - * @throws AssertionError file is 0-bytes long + * @throws AssertionError file is 0-bytes long, */ public static SuccessData loadSuccessFile(final S3AFileSystem fs, - final Path outputPath) throws IOException { + final Path outputPath, final String origin) throws IOException { + ContractTestUtils.assertPathExists(fs, + "Output directory " + outputPath + + " from " + origin + + " not found: Job may not have executed", + outputPath); Path success = new Path(outputPath, _SUCCESS); - ContractTestUtils.assertIsFile(fs, success); - FileStatus status = fs.getFileStatus(success); - assertTrue("0 byte success file - not a s3guard committer " + success, + FileStatus status = ContractTestUtils.verifyPathExists(fs, + "job completion marker " + success + + " from " + origin + + " not found: Job may have failed", + success); + assertTrue("_SUCCESS outout from " + origin + " is not a file " + status, + status.isFile()); + assertTrue("0 byte success file " + + success + " from " + origin + + "; a s3guard committer was not used", status.getLen() > 0); return SuccessData.load(fs, success); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitMRJob.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitMRJob.java index 1fb3d89efd401..c5e0265b964bd 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitMRJob.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitMRJob.java @@ -147,8 +147,8 @@ public void testMRJob() throws Exception { } Collections.sort(actualFiles); - SuccessData successData = validateSuccessFile(fs, outputPath, - committerName()); + SuccessData successData = validateSuccessFile(outputPath, committerName(), + fs, "MR job"); List successFiles = successData.getFilenames(); String commitData = successData.toString(); assertTrue("No filenames in " + commitData, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java index 17ad7f6654a3b..fd585d05b2e72 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/StagingTestBase.java @@ -19,6 +19,7 @@ package org.apache.hadoop.fs.s3a.commit.staging; import java.io.File; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; import java.io.Serializable; @@ -55,6 +56,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystemTestHelper; import org.apache.hadoop.fs.Path; @@ -196,9 +198,34 @@ public static void pathExists(FileSystem mockS3, Path path) when(mockS3.exists(path)).thenReturn(true); } + public static void pathIsDirectory(FileSystem mockS3, Path path) + throws IOException { + hasFileStatus(mockS3, path, + new FileStatus(0, true, 0, 0, 0, path)); + } + + public static void pathIsFile(FileSystem mockS3, Path path) + throws IOException { + pathExists(mockS3, path); + hasFileStatus(mockS3, path, + new FileStatus(0, false, 0, 0, 0, path)); + } + public static void pathDoesNotExist(FileSystem mockS3, Path path) throws IOException { when(mockS3.exists(path)).thenReturn(false); + when(mockS3.getFileStatus(path)).thenThrow( + new FileNotFoundException("mock fnfe of " + path)); + } + + public static void hasFileStatus(FileSystem mockS3, + Path path, FileStatus status) throws IOException { + when(mockS3.getFileStatus(path)).thenReturn(status); + } + + public static void mkdirsHasOutcome(FileSystem mockS3, + Path path, boolean outcome) throws IOException { + when(mockS3.mkdirs(path)).thenReturn(outcome); } public static void canDelete(FileSystem mockS3, String... children) @@ -221,7 +248,12 @@ public static void verifyExistenceChecked(FileSystem mockS3, String child) public static void verifyExistenceChecked(FileSystem mockS3, Path path) throws IOException { - verify(mockS3).exists(path); + verify(mockS3).getFileStatus(path); + } + + public static void verifyMkdirsInvoked(FileSystem mockS3, Path path) + throws IOException { + verify(mockS3).mkdirs(path); } /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java index 2f7e8e0520b49..505a99b0ab8ac 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingCommitter.java @@ -117,10 +117,10 @@ protected void describe(String text, Object... args) { /** * Test array for parameterized test runs: how many threads and - * how many files to use. + * whether or not filenames are unique. * @return a list of parameter tuples. */ - @Parameterized.Parameters + @Parameterized.Parameters(name="threads-{0}-unique-{1}") public static Collection params() { return Arrays.asList(new Object[][] { {0, false}, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingDirectoryOutputCommitter.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingDirectoryOutputCommitter.java index b511293e55d4d..994ecef3a83ec 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingDirectoryOutputCommitter.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingDirectoryOutputCommitter.java @@ -18,9 +18,15 @@ package org.apache.hadoop.fs.s3a.commit.staging; +import java.util.Arrays; +import java.util.stream.Collectors; + import org.junit.Test; import org.mockito.Mockito; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.PathExistsException; import org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants; @@ -34,6 +40,9 @@ public class TestStagingDirectoryOutputCommitter extends StagingTestBase.JobCommitterTest { + private static final Logger LOG = + LoggerFactory.getLogger(TestStagingDirectoryOutputCommitter.class); + @Override DirectoryStagingCommitter newJobCommitter() throws Exception { return new DirectoryStagingCommitter(outputPath, @@ -53,8 +62,10 @@ public void testBadConflictMode() throws Throwable { public void testDefaultConflictResolution() throws Exception { getJob().getConfiguration().unset( FS_S3A_COMMITTER_STAGING_CONFLICT_MODE); - verifyFailureConflictOutcome(); + pathIsDirectory(getMockS3A(), outputPath); + verifyJobSetupAndCommit(); } + @Test public void testFailConflictResolution() throws Exception { getJob().getConfiguration().set( @@ -63,8 +74,7 @@ public void testFailConflictResolution() throws Exception { } protected void verifyFailureConflictOutcome() throws Exception { - FileSystem mockS3 = getMockS3A(); - pathExists(mockS3, outputPath); + pathIsDirectory(getMockS3A(), outputPath); final DirectoryStagingCommitter committer = newJobCommitter(); // this should fail @@ -76,32 +86,36 @@ protected void verifyFailureConflictOutcome() throws Exception { // but there are no checks in job commit (HADOOP-15469) committer.commitJob(getJob()); - reset(mockS3); - pathDoesNotExist(mockS3, outputPath); + reset((FileSystem) getMockS3A()); + pathDoesNotExist(getMockS3A(), outputPath); committer.setupJob(getJob()); - verifyExistenceChecked(mockS3, outputPath); - verifyNoMoreInteractions(mockS3); + verifyExistenceChecked(getMockS3A(), outputPath); + verifyMkdirsInvoked(getMockS3A(), outputPath); + verifyNoMoreInteractions((FileSystem) getMockS3A()); - reset(mockS3); - pathDoesNotExist(mockS3, outputPath); + reset((FileSystem) getMockS3A()); + pathDoesNotExist(getMockS3A(), outputPath); committer.commitJob(getJob()); - verifyCompletion(mockS3); + verifyCompletion(getMockS3A()); } @Test public void testAppendConflictResolution() throws Exception { - FileSystem mockS3 = getMockS3A(); - - pathExists(mockS3, outputPath); getJob().getConfiguration().set( FS_S3A_COMMITTER_STAGING_CONFLICT_MODE, CONFLICT_MODE_APPEND); + FileSystem mockS3 = getMockS3A(); + pathIsDirectory(mockS3, outputPath); + verifyJobSetupAndCommit(); + } + protected void verifyJobSetupAndCommit() + throws Exception { final DirectoryStagingCommitter committer = newJobCommitter(); committer.setupJob(getJob()); - verifyNoMoreInteractions(mockS3); + FileSystem mockS3 = getMockS3A(); Mockito.reset(mockS3); pathExists(mockS3, outputPath); @@ -114,7 +128,7 @@ public void testAppendConflictResolution() throws Exception { public void testReplaceConflictResolution() throws Exception { FileSystem mockS3 = getMockS3A(); - pathExists(mockS3, outputPath); + pathIsDirectory(mockS3, outputPath); getJob().getConfiguration().set( FS_S3A_COMMITTER_STAGING_CONFLICT_MODE, CONFLICT_MODE_REPLACE); @@ -122,7 +136,6 @@ public void testReplaceConflictResolution() throws Exception { final DirectoryStagingCommitter committer = newJobCommitter(); committer.setupJob(getJob()); - verifyNoMoreInteractions(mockS3); Mockito.reset(mockS3); pathExists(mockS3, outputPath); @@ -133,4 +146,49 @@ public void testReplaceConflictResolution() throws Exception { verifyCompletion(mockS3); } + @Test + public void testReplaceConflictFailsIfDestIsFile() throws Exception { + pathIsFile(getMockS3A(), outputPath); + + getJob().getConfiguration().set( + FS_S3A_COMMITTER_STAGING_CONFLICT_MODE, CONFLICT_MODE_REPLACE); + + intercept(PathExistsException.class, + InternalCommitterConstants.E_DEST_EXISTS, + "Expected a PathExistsException as the destination" + + " was a file", + () -> { + newJobCommitter().setupJob(getJob()); + }); + } + + @Test + public void testAppendConflictFailsIfDestIsFile() throws Exception { + pathIsFile(getMockS3A(), outputPath); + + getJob().getConfiguration().set( + FS_S3A_COMMITTER_STAGING_CONFLICT_MODE, CONFLICT_MODE_APPEND); + + intercept(PathExistsException.class, + InternalCommitterConstants.E_DEST_EXISTS, + "Expected a PathExistsException as a the destination" + + " is a file", + () -> { + newJobCommitter().setupJob(getJob()); + }); + } + + @Test + public void testValidateDefaultConflictMode() throws Throwable { + Configuration baseConf = new Configuration(true); + String[] sources = baseConf.getPropertySources( + FS_S3A_COMMITTER_STAGING_CONFLICT_MODE); + String sourceStr = Arrays.stream(sources) + .collect(Collectors.joining(",")); + LOG.info("source of conflict mode {}", sourceStr); + String baseConfVal = baseConf + .getTrimmed(FS_S3A_COMMITTER_STAGING_CONFLICT_MODE); + assertEquals("conflict mode in core config from " + sourceStr, + CONFLICT_MODE_APPEND, baseConfVal); + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java index 2409b681e16bd..8116b79eb7fe7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/TestStagingPartitionedTaskCommit.java @@ -33,7 +33,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathExistsException; -import org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants; import org.apache.hadoop.mapreduce.JobContext; import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; @@ -80,48 +79,13 @@ public void testBadConflictMode() throws Throwable { @Test public void testDefault() throws Exception { - FileSystem mockS3 = getMockS3A(); - JobContext job = getJob(); job.getConfiguration().unset( FS_S3A_COMMITTER_STAGING_CONFLICT_MODE); final PartitionedStagingCommitter committer = newTaskCommitter(); committer.setupTask(getTAC()); - assertConflictResolution(committer, job, ConflictResolution.FAIL); - createTestOutputFiles(relativeFiles, - committer.getTaskAttemptPath(getTAC()), getTAC().getConfiguration()); - - // test failure when one partition already exists - reset(mockS3); - Path exists = new Path(outputPath, relativeFiles.get(0)).getParent(); - pathExists(mockS3, exists); - - intercept(PathExistsException.class, - InternalCommitterConstants.E_DEST_EXISTS, - "Expected a PathExistsException as a partition" - + " already exists:" + exists, - () -> { - committer.commitTask(getTAC()); - mockS3.getFileStatus(exists); - }); - - // test success - reset(mockS3); - - committer.commitTask(getTAC()); - Set files = Sets.newHashSet(); - for (InitiateMultipartUploadRequest request : - getMockResults().getRequests().values()) { - assertEquals(BUCKET, request.getBucketName()); - files.add(request.getKey()); - } - assertEquals("Should have the right number of uploads", - relativeFiles.size(), files.size()); - - Set expected = buildExpectedList(committer); - - assertEquals("Should have correct paths", expected, files); + assertConflictResolution(committer, job, ConflictResolution.APPEND); } @Test diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestDirectoryCommitProtocol.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestDirectoryCommitProtocol.java index ea28ecbe58cf6..36857669553f6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestDirectoryCommitProtocol.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/staging/integration/ITestDirectoryCommitProtocol.java @@ -19,7 +19,12 @@ package org.apache.hadoop.fs.s3a.commit.staging.integration; import java.io.IOException; +import java.util.Arrays; +import java.util.stream.Collectors; +import org.junit.Test; + +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.commit.AbstractS3ACommitter; import org.apache.hadoop.fs.s3a.commit.CommitConstants; @@ -30,6 +35,9 @@ import org.apache.hadoop.mapreduce.JobStatus; import org.apache.hadoop.mapreduce.TaskAttemptContext; +import static org.apache.hadoop.fs.s3a.commit.CommitConstants.CONFLICT_MODE_APPEND; +import static org.apache.hadoop.fs.s3a.commit.CommitConstants.FS_S3A_COMMITTER_STAGING_CONFLICT_MODE; + /** ITest of the low level protocol methods. */ public class ITestDirectoryCommitProtocol extends ITestStagingCommitProtocol { @@ -57,6 +65,32 @@ public AbstractS3ACommitter createFailingCommitter( return new CommitterWithFailedThenSucceed(getOutDir(), tContext); } + /** + * This is here because somehow test runs were failing with + * the confict mode being fail. Unsetting per-bucket options + * in setup made this go away; its retained for regression + * testing + */ + @Test + public void testValidateDefaultConflictMode() throws Throwable { + describe("Checking default conflict mode adoption"); + Configuration baseConf = new Configuration(true); + String[] sources = baseConf.getPropertySources( + FS_S3A_COMMITTER_STAGING_CONFLICT_MODE); + String sourceStr = Arrays.stream(sources) + .collect(Collectors.joining(",")); + String baseConfVal = baseConf + .getTrimmed(FS_S3A_COMMITTER_STAGING_CONFLICT_MODE); + assertEquals("conflict mode in core config from "+ sourceStr, + CONFLICT_MODE_APPEND, baseConfVal); + + Configuration fsConf = getFileSystem().getConf(); + String conflictModeDefVal = fsConf + .getTrimmed(FS_S3A_COMMITTER_STAGING_CONFLICT_MODE); + assertEquals("conflict mode in filesystem", + CONFLICT_MODE_APPEND, conflictModeDefVal); + } + /** * The class provides a overridden implementation of commitJobInternal which * causes the commit failed for the first time then succeed. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/terasort/AbstractCommitTerasortIT.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/terasort/AbstractCommitTerasortIT.java index 7db3068f49a4c..479b3c80f7c55 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/terasort/AbstractCommitTerasortIT.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/terasort/AbstractCommitTerasortIT.java @@ -18,6 +18,8 @@ package org.apache.hadoop.fs.s3a.commit.terasort; +import java.io.File; +import java.nio.charset.Charset; import java.util.Optional; import java.util.function.BiConsumer; @@ -27,13 +29,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.commons.io.FileUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.examples.terasort.TeraGen; import org.apache.hadoop.examples.terasort.TeraSort; import org.apache.hadoop.examples.terasort.TeraSortConfigKeys; import org.apache.hadoop.examples.terasort.TeraValidate; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.commit.AbstractYarnClusterITest; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.DurationInfo; @@ -108,6 +110,9 @@ private void prepareToTerasort() { yarnConfig.setBoolean( TeraSortConfigKeys.USE_SIMPLE_PARTITIONER.key(), true); + yarnConfig.setBoolean( + TeraSortConfigKeys.USE_SIMPLE_PARTITIONER.key(), + false); terasortPath = new Path("/terasort-" + getClass().getSimpleName()) .makeQualified(getFileSystem()); sortInput = new Path(terasortPath, "sortin"); @@ -143,7 +148,7 @@ private Optional executeStage( assertEquals(stage + "(" + StringUtils.join(", ", args) + ")" + " failed", 0, result); - validateSuccessFile(getFileSystem(), dest, committerName()); + validateSuccessFile(dest, committerName(), getFileSystem(), stage); return Optional.of(d); } @@ -161,6 +166,7 @@ public void test_100_terasort_setup() throws Throwable { @Test public void test_110_teragen() throws Throwable { describe("Teragen to %s", sortInput); + getFileSystem().delete(sortInput, true); JobConf jobConf = newJobConf(); patchConfigurationForCommitter(jobConf); @@ -174,7 +180,9 @@ public void test_110_teragen() throws Throwable { @Test public void test_120_terasort() throws Throwable { describe("Terasort from %s to %s", sortInput, sortOutput); - loadSuccessFile(getFileSystem(), sortInput); + getFileSystem().delete(sortOutput, true); + + loadSuccessFile(getFileSystem(), sortInput, "previous teragen stage"); JobConf jobConf = newJobConf(); patchConfigurationForCommitter(jobConf); // this job adds some data, so skip it. @@ -189,7 +197,8 @@ public void test_120_terasort() throws Throwable { @Test public void test_130_teravalidate() throws Throwable { describe("TeraValidate from %s to %s", sortOutput, sortValidate); - loadSuccessFile(getFileSystem(), sortOutput); + getFileSystem().delete(sortValidate, true); + loadSuccessFile(getFileSystem(), sortOutput, "previous terasort stage"); JobConf jobConf = newJobConf(); patchConfigurationForCommitter(jobConf); teravalidateStageDuration = executeStage("TeraValidate", @@ -224,9 +233,9 @@ public void test_140_teracomplete() throws Throwable { stage.accept("Validate", teravalidateStageDuration); stage.accept("Completed", terasortDuration); String text = results.toString(); - Path path = new Path(terasortPath, "results.csv"); - LOG.info("Results are in {}\n{}", path, text); - ContractTestUtils.writeTextFile(getFileSystem(), path, text, true); + File resultsFile = File.createTempFile("results", ".csv"); + FileUtils.write(resultsFile, text, Charset.forName("UTF-8")); + LOG.info("Results are in {}\n{}", resultsFile, text); } /**