Skip to content

Commit 26514b6

Browse files
committed
HADOOP-17628. Distcp contract test is really slow with ABFS and S3A; timing out. (#3240)
This patch cuts down the size of directory trees used for distcp contract tests against object stores, so making them much faster against distant/slow stores. On abfs, the test only runs with -Dscale (as was the case for s3a already), and has the larger scale test timeout. After every test case, the FileSystem IOStatistics are logged, to provide information about what IO is taking place and what it's performance is. There are some test cases which upload files of 1+ MiB; you can increase the size of the upload in the option "scale.test.distcp.file.size.kb" Set it to zero and the large file tests are skipped. Contributed by Steve Loughran.
1 parent 904cdd0 commit 26514b6

File tree

6 files changed

+149
-104
lines changed

6 files changed

+149
-104
lines changed

hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/test/GenericTestUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -948,8 +948,8 @@ public static List<Path> createFiles(final FileSystem fs,
948948
final int fileCount,
949949
final int dirCount) throws IOException {
950950
return createDirsAndFiles(fs, destDir, depth, fileCount, dirCount,
951-
new ArrayList<Path>(fileCount),
952-
new ArrayList<Path>(dirCount));
951+
new ArrayList<>(fileCount),
952+
new ArrayList<>(dirCount));
953953
}
954954

955955
/**

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractDistCp.java

Lines changed: 10 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -18,17 +18,12 @@
1818

1919
package org.apache.hadoop.fs.contract.s3a;
2020

21-
import java.io.FileNotFoundException;
22-
import java.io.IOException;
23-
2421
import static org.apache.hadoop.fs.s3a.Constants.*;
2522
import static org.apache.hadoop.fs.s3a.S3ATestConstants.SCALE_TEST_TIMEOUT_MILLIS;
2623
import static org.apache.hadoop.fs.s3a.S3ATestUtils.maybeEnableS3Guard;
2724

2825
import org.apache.hadoop.conf.Configuration;
29-
import org.apache.hadoop.fs.Path;
3026
import org.apache.hadoop.fs.StorageStatistics;
31-
import org.apache.hadoop.fs.s3a.FailureInjectionPolicy;
3227
import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
3328

3429
/**
@@ -60,41 +55,28 @@ protected Configuration createConfiguration() {
6055
}
6156

6257
@Override
63-
protected S3AContract createContract(Configuration conf) {
64-
return new S3AContract(conf);
58+
protected boolean shouldUseDirectWrite() {
59+
return true;
6560
}
6661

67-
/**
68-
* Always inject the delay path in, so if the destination is inconsistent,
69-
* and uses this key, inconsistency triggered.
70-
* @param filepath path string in
71-
* @return path on the remote FS for distcp
72-
* @throws IOException IO failure
73-
*/
7462
@Override
75-
protected Path path(final String filepath) throws IOException {
76-
Path path = super.path(filepath);
77-
return new Path(path, FailureInjectionPolicy.DEFAULT_DELAY_KEY_SUBSTRING);
63+
protected S3AContract createContract(Configuration conf) {
64+
return new S3AContract(conf);
7865
}
7966

8067
@Override
81-
public void testDirectWrite() throws Exception {
68+
public void testDistCpWithIterator() throws Exception {
8269
final long renames = getRenameOperationCount();
83-
super.testDirectWrite();
84-
assertEquals("Expected no renames for a direct write distcp", 0L,
85-
getRenameOperationCount() - renames);
70+
super.testDistCpWithIterator();
71+
assertEquals("Expected no renames for a direct write distcp",
72+
getRenameOperationCount(),
73+
renames);
8674
}
8775

8876
@Override
8977
public void testNonDirectWrite() throws Exception {
9078
final long renames = getRenameOperationCount();
91-
try {
92-
super.testNonDirectWrite();
93-
} catch (FileNotFoundException e) {
94-
// We may get this exception when data is written to a DELAY_LISTING_ME
95-
// directory causing verification of the distcp success to fail if
96-
// S3Guard is not enabled
97-
}
79+
super.testNonDirectWrite();
9880
assertEquals("Expected 2 renames for a non-direct write distcp", 2L,
9981
getRenameOperationCount() - renames);
10082
}

hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractDistCp.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,16 +19,24 @@
1919
package org.apache.hadoop.fs.azurebfs.contract;
2020

2121
import org.apache.hadoop.conf.Configuration;
22+
import org.apache.hadoop.fs.azure.integration.AzureTestConstants;
2223
import org.apache.hadoop.fs.azurebfs.services.AuthType;
2324
import org.apache.hadoop.tools.contract.AbstractContractDistCpTest;
2425
import org.junit.Assume;
2526

27+
import static org.apache.hadoop.fs.azure.integration.AzureTestUtils.assumeScaleTestsEnabled;
28+
2629
/**
2730
* Contract test for distCp operation.
2831
*/
2932
public class ITestAbfsFileSystemContractDistCp extends AbstractContractDistCpTest {
3033
private final ABFSContractTestBinding binding;
3134

35+
@Override
36+
protected int getTestTimeoutMillis() {
37+
return AzureTestConstants.SCALE_TEST_TIMEOUT_MILLIS;
38+
}
39+
3240
public ITestAbfsFileSystemContractDistCp() throws Exception {
3341
binding = new ABFSContractTestBinding();
3442
Assume.assumeTrue(binding.getAuthType() != AuthType.OAuth);
@@ -38,6 +46,7 @@ public ITestAbfsFileSystemContractDistCp() throws Exception {
3846
public void setup() throws Exception {
3947
binding.setup();
4048
super.setup();
49+
assumeScaleTestsEnabled(binding.getRawConfiguration());
4150
}
4251

4352
@Override

hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/contract/ITestAbfsFileSystemContractSecureDistCp.java

Lines changed: 0 additions & 49 deletions
This file was deleted.

0 commit comments

Comments
 (0)