Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -146,4 +146,22 @@ private CommonPathCapabilities() {
*/
public static final String ABORTABLE_STREAM =
"fs.capability.outputstream.abortable";

/**
* Does this FS support etags?
* That is: will FileStatus entries from listing/getFileStatus
* probes support EtagSource and return real values.
*/
public static final String ETAGS_AVAILABLE =
"fs.capability.etags.available";

/**
* Are etags guaranteed to be preserved across rename() operations..
* FileSystems MUST NOT declare support for this feature
* unless this holds.
*/
public static final String ETAGS_PRESERVED_IN_RENAME =
"fs.capability.etags.preserved.in.rename";


}
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs;

/**
* An optional interface for {@link FileStatus} subclasses to implement
* to provide access to etags.
* If available FS SHOULD also implement the matching PathCapabilities
* -- etag supported: {@link CommonPathCapabilities#ETAGS_AVAILABLE}.
* -- etag consistent over rename:
* {@link CommonPathCapabilities#ETAGS_PRESERVED_IN_RENAME}.
*/
public interface EtagSource {

/**
* Return an etag of this file status.
* A return value of null or "" means "no etag"
* @return a possibly null or empty etag.
*/
String getEtag();

}
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,64 @@ for both files and directories, MUST always return `true` to the `isEncrypted()`
predicate. This can be done by setting the `encrypted` flag to true when creating
the `FileStatus` instance.

#### Interface `EtagFromFileStatus`

FileSystem implementations MAY support querying HTTP etags from `FileStatus`
entries. If so, the requirements are

#### Etags MUST BE different for different file contents.

Two different arrays of data written to the same path MUST have different etag
values when probed.
This is a requirement of the HTTP specification.

##### Etags MUST BE Consistent across listing operations.

The value of `EtagFromFileStatus.getEtag()` MUST be the same for list* queries as
for `getFileStatus()`.

```java
((EtagFromFileStatus)getFileStatus(path)).getEtag() == ((EtagFromFileStatus)listStatus(path)[0]).getEtag()
```

+the same value is returned for `listFiles()`, `listStatusIncremental()` of the path
and, when listing the parent path, of all files in the listing.

##### Etags MUST BE preserved across rename operations

The value of `EtagFromFileStatus.getEtag()` SHOULD be the same after a file is renamed.
This is an implementation detail of the store; it does not hold for AWS S3.

#### `FileStatus` subclass MUST BE `Serializable`; MAY BE `Writable`

The base `FileStatus` class implements `Serializable` and `Writable` and marshalls
its fields appropriately.

Subclasses MUST support java serialization (Some Apache Spark applications use it),
preserving the etag. This is a matter of making the etag field non-static and
adding a `serialVersionUID`.

The `Writable` support was used for marshalling status data over Hadoop IPC calls;
Now that is implemented through `org/apache/hadoop/fs/protocolPB/PBHelper.java`
and the methods deprecated.
Subclasses MAY override the deprecated methods to add etag marshalling -but there
is no expectation of this.

#### Appropriate etag Path Capabilities MUST BE declared

1. `hasPathCapability(path, "fs.capability.etags.available")` MUST return true iff
the filesystem returns valid (non-empty etags).
3. `hasPathCapability(path, "fs.capability.etags.consistent.across.rename")` MUST return true
if and only if etags are preserved across renames.


#### Non-requirements of etag support

* There is no requirement/expectation that `FileSystem.getFileChecksum(Path)` returns a
checksum value related to the etag of an object, if any value is returned.
* If the same data is uploaded to the twice to the same or a different path,
the etag of the second upload MAY NOT metch that of the first upload.


### `msync()`

Expand Down Expand Up @@ -1240,7 +1298,7 @@ Renaming a file where the destination is a directory moves the file as a child
FS' where:
not exists(FS', src)
and exists(FS', dest)
and data(FS', dest) == data (FS, dest)
and data(FS', dest) == data (FS, source)
result = True


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.hadoop.fs.contract;

import java.nio.charset.StandardCharsets;

import org.assertj.core.api.Assertions;
import org.junit.Assume;
import org.junit.Test;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hadoop.fs.EtagSource;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;

import static org.apache.hadoop.fs.CommonPathCapabilities.ETAGS_AVAILABLE;
import static org.apache.hadoop.fs.CommonPathCapabilities.ETAGS_PRESERVED_IN_RENAME;

/**
* For filesystems which support etags, validate correctness
* of their implementation.
*/
public abstract class AbstractContractEtagTest extends
AbstractFSContractTestBase {

private static final Logger LOG =
LoggerFactory.getLogger(AbstractContractEtagTest.class);

/**
* basic consistency across operations, as well as being non-empty.
*/
@Test
public void testEtagConsistencyAcrossListAndHead() throws Throwable {
describe("Etag values must be non-empty and consistent across LIST and HEAD Calls.");
final Path path = methodPath();
final FileSystem fs = getFileSystem();

Assertions.assertThat(fs.hasPathCapability(path, ETAGS_AVAILABLE))
.describedAs("path capability %s of %s",
ETAGS_AVAILABLE, path)
.isTrue();

ContractTestUtils.touch(fs, path);


final FileStatus st = fs.getFileStatus(path);
final String etag = etagFromStatus(st);
Assertions.assertThat(etag)
.describedAs("Etag of %s", st)
.isNotBlank();
LOG.info("etag of empty file is \"{}\"", etag);

final FileStatus[] statuses = fs.listStatus(path);
Assertions.assertThat(statuses)
.describedAs("List(%s)", path)
.hasSize(1);
final FileStatus lsStatus = statuses[0];
Assertions.assertThat(etagFromStatus(lsStatus))
.describedAs("etag of list status (%s) compared to HEAD value of %s", lsStatus, st)
.isEqualTo(etag);
}

/**
* Get an etag from a FileStatus which MUST BE
* an implementation of EtagSource and
* whose etag MUST NOT BE null/empty.
* @param st the status
* @return the etag
*/
String etagFromStatus(FileStatus st) {
Assertions.assertThat(st)
.describedAs("FileStatus %s", st)
.isInstanceOf(EtagSource.class);
final String etag = ((EtagSource) st).getEtag();
Assertions.assertThat(etag)
.describedAs("Etag of %s", st)
.isNotBlank();
return etag;
}

/**
* Overwritten data has different etags.
*/
@Test
public void testEtagsOfDifferentDataDifferent() throws Throwable {
describe("Verify that two different blocks of data written have different tags");

final Path path = methodPath();
final FileSystem fs = getFileSystem();
Path src = new Path(path, "src");

ContractTestUtils.createFile(fs, src, true,
"data1234".getBytes(StandardCharsets.UTF_8));
final FileStatus srcStatus = fs.getFileStatus(src);
final String srcTag = etagFromStatus(srcStatus);
LOG.info("etag of file 1 is \"{}\"", srcTag);

// now overwrite with data of same length
// (ensure that path or length aren't used exclusively as tag)
ContractTestUtils.createFile(fs, src, true,
"1234data".getBytes(StandardCharsets.UTF_8));

// validate
final String tag2 = etagFromStatus(fs.getFileStatus(src));
LOG.info("etag of file 2 is \"{}\"", tag2);

Assertions.assertThat(tag2)
.describedAs("etag of updated file")
.isNotEqualTo(srcTag);
}

/**
* If supported, rename preserves etags.
*/
@Test
public void testEtagConsistencyAcrossRename() throws Throwable {
describe("Verify that when a file is renamed, the etag remains unchanged");
final Path path = methodPath();
final FileSystem fs = getFileSystem();
Assume.assumeTrue(
"Filesystem does not declare that etags are preserved across renames",
fs.hasPathCapability(path, ETAGS_PRESERVED_IN_RENAME));
Path src = new Path(path, "src");
Path dest = new Path(path, "dest");

ContractTestUtils.createFile(fs, src, true,
"sample data".getBytes(StandardCharsets.UTF_8));
final FileStatus srcStatus = fs.getFileStatus(src);
final String srcTag = etagFromStatus(srcStatus);
LOG.info("etag of short file is \"{}\"", srcTag);

Assertions.assertThat(srcTag)
.describedAs("Etag of %s", srcStatus)
.isNotBlank();

// rename
fs.rename(src, dest);

// validate
FileStatus destStatus = fs.getFileStatus(dest);
final String destTag = etagFromStatus(destStatus);
Assertions.assertThat(destTag)
.describedAs("etag of list status (%s) compared to HEAD value of %s", destStatus, srcStatus)
.isEqualTo(srcTag);
}

}
Loading