Skip to content

Commit 324be6d

Browse files
author
Ben Roling
committed
S3GuardTool updates to correct ETag or versionId metadata
1 parent 2a2bba7 commit 324be6d

File tree

3 files changed

+48
-3
lines changed

3 files changed

+48
-3
lines changed

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -724,7 +724,7 @@ private long importDir(FileStatus status) throws IOException {
724724
long items = 0;
725725

726726
while (it.hasNext()) {
727-
LocatedFileStatus located = it.next();
727+
S3LocatedFileStatus located = it.next();
728728
S3AFileStatus child;
729729
if (located.isDirectory()) {
730730
child = DynamoDBMetadataStore.makeDirStatus(located.getPath(),
@@ -736,8 +736,8 @@ private long importDir(FileStatus status) throws IOException {
736736
located.getPath(),
737737
located.getBlockSize(),
738738
located.getOwner(),
739-
null,
740-
null);
739+
located.getETag(),
740+
located.getVersionId());
741741
}
742742
putParentsIfNotPresent(child);
743743
getStore().put(new PathMetadata(child));

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,14 @@ If this happens, reads of the affected file(s) will result in
10201020
* the file is overwritten (causing an S3Guard metadata update)
10211021
* the S3Guard metadata is pruned
10221022

1023+
The S3Guard metadata for a file can be corrected with the `s3guard import`
1024+
command as discussed above. The command can take a file URI instead of a
1025+
bucket URI to correct the metdata for a single file. For example:
1026+
1027+
```bash
1028+
hadoop s3guard import [-meta URI] s3a://my-bucket/file-with-bad-metadata
1029+
```
1030+
10231031
## Troubleshooting
10241032

10251033
### Error: `S3Guard table lacks version marker.`

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardToolLocal.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,9 @@
3737

3838
import org.apache.hadoop.fs.FSDataOutputStream;
3939
import org.apache.hadoop.fs.Path;
40+
import org.apache.hadoop.fs.s3a.S3AFileStatus;
4041
import org.apache.hadoop.fs.s3a.S3AFileSystem;
42+
import org.apache.hadoop.fs.s3a.Tristate;
4143

4244
import static org.apache.hadoop.fs.s3a.MultipartTestUtils.*;
4345
import static org.apache.hadoop.fs.s3a.S3ATestUtils.getLandsatCSVFile;
@@ -95,6 +97,41 @@ public void testImportCommand() throws Exception {
9597
// assertTrue(children.isAuthoritative());
9698
}
9799

100+
@Test
101+
public void testImportCommandRepairsETagAndVersionId() throws Exception {
102+
S3AFileSystem fs = getFileSystem();
103+
MetadataStore ms = getMetadataStore();
104+
Path path = path("test-version-metadata");
105+
try (FSDataOutputStream out = fs.create(path)) {
106+
out.write(1);
107+
}
108+
S3AFileStatus originalStatus = (S3AFileStatus) fs.getFileStatus(path);
109+
110+
// put in bogus ETag and versionId
111+
S3AFileStatus bogusStatus = S3AFileStatus.fromFileStatus(originalStatus,
112+
Tristate.FALSE, "bogusETag", "bogusVersionId");
113+
ms.put(new PathMetadata(bogusStatus));
114+
115+
// sanity check that bogus status is actually persisted
116+
S3AFileStatus retrievedBogusStatus = (S3AFileStatus) fs.getFileStatus(path);
117+
assertEquals("bogus ETag was not persisted",
118+
"bogusETag", retrievedBogusStatus.getETag());
119+
assertEquals("bogus versionId was not persisted",
120+
"bogusVersionId", retrievedBogusStatus.getVersionId());
121+
122+
// execute the import
123+
S3GuardTool.Import cmd = new S3GuardTool.Import(fs.getConf());
124+
cmd.setStore(ms);
125+
exec(cmd, "import", path.toString());
126+
127+
// make sure ETag and versionId were corrected
128+
S3AFileStatus updatedStatus = (S3AFileStatus) fs.getFileStatus(path);
129+
assertEquals("ETag was not corrected",
130+
originalStatus.getETag(), updatedStatus.getETag());
131+
assertEquals("VersionId was not corrected",
132+
originalStatus.getVersionId(), updatedStatus.getVersionId());
133+
}
134+
98135
@Test
99136
public void testDestroyBucketExistsButNoTable() throws Throwable {
100137
run(Destroy.NAME,

0 commit comments

Comments
 (0)