Skip to content

Commit 0b5af5f

Browse files
committed
addendum - doc, test, source changes
1 parent 811ddca commit 0b5af5f

File tree

9 files changed

+186
-68
lines changed

9 files changed

+186
-68
lines changed

hadoop-common-project/hadoop-common/src/main/resources/core-default.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1718,14 +1718,14 @@
17181718
<name>fs.s3a.encryption.algorithm</name>
17191719
<description>Specify a server-side encryption or client-side
17201720
encryption algorithm for s3a: file system. Unset by default. It supports the
1721-
following values: 'AES256' (for SSE-S3), 'SSE-KMS', 'SSE-C', and 'CSE-KMS'
1721+
following values: 'AES256' (for SSE-S3), 'SSE-KMS', 'DSSE-KMS', 'SSE-C', and 'CSE-KMS'
17221722
</description>
17231723
</property>
17241724

17251725
<property>
17261726
<name>fs.s3a.encryption.key</name>
17271727
<description>Specific encryption key to use if fs.s3a.encryption.algorithm
1728-
has been set to 'SSE-KMS', 'SSE-C' or 'CSE-KMS'. In the case of SSE-C
1728+
has been set to 'SSE-KMS', 'DSSE-KMS', 'SSE-C' or 'CSE-KMS'. In the case of SSE-C
17291729
, the value of this property should be the Base64 encoded key. If you are
17301730
using SSE-KMS and leave this property empty, you'll be using your default's
17311731
S3 KMS key, otherwise you should set this property to the specific KMS key

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AEncryptionMethods.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ public enum S3AEncryptionMethods {
4040
* Error string when {@link #getMethod(String)} fails.
4141
* Used in tests.
4242
*/
43-
static final String UNKNOWN_ALGORITHM
43+
public static final String UNKNOWN_ALGORITHM
4444
= "Unknown encryption algorithm ";
4545

4646
/**

hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java

Lines changed: 67 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets;
6161

6262
import static org.apache.commons.lang3.StringUtils.isNotEmpty;
63+
import static org.apache.hadoop.fs.s3a.S3AEncryptionMethods.UNKNOWN_ALGORITHM;
6364
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.DEFAULT_UPLOAD_PART_COUNT_LIMIT;
6465
import static org.apache.hadoop.util.Preconditions.checkArgument;
6566
import static org.apache.hadoop.util.Preconditions.checkNotNull;
@@ -272,28 +273,38 @@ protected void copyEncryptionParameters(HeadObjectResponse srcom,
272273
return;
273274
}
274275

275-
if (S3AEncryptionMethods.SSE_S3 == algorithm) {
276+
switch (algorithm) {
277+
case SSE_S3:
276278
copyObjectRequestBuilder.serverSideEncryption(algorithm.getMethod());
277-
} else if (S3AEncryptionMethods.SSE_KMS == algorithm) {
279+
break;
280+
case SSE_KMS:
278281
copyObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
279282
// Set the KMS key if present, else S3 uses AWS managed key.
280283
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
281-
.ifPresent(kmsKey -> copyObjectRequestBuilder.ssekmsKeyId(kmsKey));
282-
} else if (S3AEncryptionMethods.DSSE_KMS == algorithm) {
284+
.ifPresent(copyObjectRequestBuilder::ssekmsKeyId);
285+
break;
286+
case DSSE_KMS:
283287
copyObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS_DSSE);
284288
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
285289
.ifPresent(copyObjectRequestBuilder::ssekmsKeyId);
286-
} else if (S3AEncryptionMethods.SSE_C == algorithm) {
290+
break;
291+
case SSE_C:
287292
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets)
288-
.ifPresent(base64customerKey -> {
289-
copyObjectRequestBuilder.copySourceSSECustomerAlgorithm(
290-
ServerSideEncryption.AES256.name()).copySourceSSECustomerKey(base64customerKey)
291-
.copySourceSSECustomerKeyMD5(
292-
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)))
293-
.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
294-
.sseCustomerKey(base64customerKey).sseCustomerKeyMD5(
295-
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
296-
});
293+
.ifPresent(base64customerKey -> copyObjectRequestBuilder
294+
.copySourceSSECustomerAlgorithm(ServerSideEncryption.AES256.name())
295+
.copySourceSSECustomerKey(base64customerKey)
296+
.copySourceSSECustomerKeyMD5(
297+
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)))
298+
.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
299+
.sseCustomerKey(base64customerKey).sseCustomerKeyMD5(
300+
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))));
301+
break;
302+
case CSE_KMS:
303+
case CSE_CUSTOM:
304+
case NONE:
305+
break;
306+
default:
307+
LOG.warn(UNKNOWN_ALGORITHM + ": " + algorithm);
297308
}
298309
}
299310
/**
@@ -351,24 +362,35 @@ private void putEncryptionParameters(PutObjectRequest.Builder putObjectRequestBu
351362
final S3AEncryptionMethods algorithm
352363
= getServerSideEncryptionAlgorithm();
353364

354-
if (S3AEncryptionMethods.SSE_S3 == algorithm) {
365+
switch (algorithm) {
366+
case SSE_S3:
355367
putObjectRequestBuilder.serverSideEncryption(algorithm.getMethod());
356-
} else if (S3AEncryptionMethods.SSE_KMS == algorithm) {
368+
break;
369+
case SSE_KMS:
357370
putObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
358371
// Set the KMS key if present, else S3 uses AWS managed key.
359372
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
360-
.ifPresent(kmsKey -> putObjectRequestBuilder.ssekmsKeyId(kmsKey));
361-
} else if (S3AEncryptionMethods.DSSE_KMS == algorithm) {
373+
.ifPresent(putObjectRequestBuilder::ssekmsKeyId);
374+
break;
375+
case DSSE_KMS:
362376
putObjectRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS_DSSE);
363377
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
364378
.ifPresent(putObjectRequestBuilder::ssekmsKeyId);
365-
} else if (S3AEncryptionMethods.SSE_C == algorithm) {
379+
break;
380+
case SSE_C:
366381
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets)
367-
.ifPresent(base64customerKey -> {
368-
putObjectRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
369-
.sseCustomerKey(base64customerKey).sseCustomerKeyMD5(
370-
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
371-
});
382+
.ifPresent(base64customerKey -> putObjectRequestBuilder
383+
.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
384+
.sseCustomerKey(base64customerKey)
385+
.sseCustomerKeyMD5(Md5Utils.md5AsBase64(
386+
Base64.getDecoder().decode(base64customerKey))));
387+
break;
388+
case CSE_KMS:
389+
case CSE_CUSTOM:
390+
case NONE:
391+
break;
392+
default:
393+
LOG.warn(UNKNOWN_ALGORITHM + ": " + algorithm);
372394
}
373395
}
374396

@@ -416,24 +438,35 @@ private void multipartUploadEncryptionParameters(
416438
CreateMultipartUploadRequest.Builder mpuRequestBuilder) {
417439
final S3AEncryptionMethods algorithm = getServerSideEncryptionAlgorithm();
418440

419-
if (S3AEncryptionMethods.SSE_S3 == algorithm) {
441+
switch (algorithm) {
442+
case SSE_S3:
420443
mpuRequestBuilder.serverSideEncryption(algorithm.getMethod());
421-
} else if (S3AEncryptionMethods.SSE_KMS == algorithm) {
444+
break;
445+
case SSE_KMS:
422446
mpuRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS);
423447
// Set the KMS key if present, else S3 uses AWS managed key.
424448
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
425-
.ifPresent(kmsKey -> mpuRequestBuilder.ssekmsKeyId(kmsKey));
426-
} else if (S3AEncryptionMethods.DSSE_KMS == algorithm) {
449+
.ifPresent(mpuRequestBuilder::ssekmsKeyId);
450+
break;
451+
case DSSE_KMS:
427452
mpuRequestBuilder.serverSideEncryption(ServerSideEncryption.AWS_KMS_DSSE);
428453
EncryptionSecretOperations.getSSEAwsKMSKey(encryptionSecrets)
429454
.ifPresent(mpuRequestBuilder::ssekmsKeyId);
430-
} else if (S3AEncryptionMethods.SSE_C == algorithm) {
455+
break;
456+
case SSE_C:
431457
EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets)
432-
.ifPresent(base64customerKey -> {
433-
mpuRequestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
434-
.sseCustomerKey(base64customerKey).sseCustomerKeyMD5(
435-
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey)));
436-
});
458+
.ifPresent(base64customerKey -> mpuRequestBuilder
459+
.sseCustomerAlgorithm(ServerSideEncryption.AES256.name())
460+
.sseCustomerKey(base64customerKey)
461+
.sseCustomerKeyMD5(
462+
Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))));
463+
break;
464+
case CSE_KMS:
465+
case CSE_CUSTOM:
466+
case NONE:
467+
break;
468+
default:
469+
LOG.warn(UNKNOWN_ALGORITHM + ": " + algorithm);
437470
}
438471
}
439472

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/encryption.md

Lines changed: 80 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ The server-side "SSE" encryption is performed with symmetric AES256 encryption;
6666
S3 offers different mechanisms for actually defining the key to use.
6767

6868

69-
There are four key management mechanisms, which in order of simplicity of use,
69+
There are five key management mechanisms, which in order of simplicity of use,
7070
are:
7171

7272
* S3 Default Encryption
@@ -75,6 +75,9 @@ are:
7575
by Amazon's Key Management Service, a key referenced by name in the uploading client.
7676
* SSE-C : the client specifies an actual base64 encoded AES-256 key to be used
7777
to encrypt and decrypt the data.
78+
* DSSE-KMS: Two independent layers of encryption at server side. An AES256 key is
79+
generated in S3, and encrypted with a secret key provided by Amazon's Key Management
80+
Service.
7881

7982
Encryption options
8083

@@ -84,14 +87,15 @@ Encryption options
8487
| `SSE-KMS` | server side, KMS key | key used to encrypt/decrypt | none |
8588
| `SSE-C` | server side, custom key | encryption algorithm and secret | encryption algorithm and secret |
8689
| `CSE-KMS` | client side, KMS key | encryption algorithm and key ID | encryption algorithm |
90+
| `DSSE-KMS` | server side, KMS key | key used to encrypt/decrypt | none |
8791

8892
With server-side encryption, the data is uploaded to S3 unencrypted (but wrapped by the HTTPS
8993
encryption channel).
9094
The data is encrypted in the S3 store and decrypted when it's being retrieved.
9195

9296
A server side algorithm can be enabled by default for a bucket, so that
9397
whenever data is uploaded unencrypted a default encryption algorithm is added.
94-
When data is encrypted with S3-SSE or SSE-KMS it is transparent to all clients
98+
When data is encrypted with S3-SSE, SSE-KMS or DSSE-KMS it is transparent to all clients
9599
downloading the data.
96100
SSE-C is different in that every client must know the secret key needed to decypt the data.
97101

@@ -132,7 +136,7 @@ not explicitly declare an encryption algorithm.
132136

133137
[S3 Default Encryption for S3 Buckets](https://docs.aws.amazon.com/AmazonS3/latest/dev/bucket-encryption.html)
134138

135-
This supports SSE-S3 and SSE-KMS.
139+
This supports SSE-S3, SSE-KMS and DSSE-KMS.
136140

137141
There is no need to set anything up in the client: do it in the AWS console.
138142

@@ -316,6 +320,79 @@ metadata. Since only one encryption key can be provided at a time, S3A will not
316320
pass the correct encryption key to decrypt the data.
317321

318322

323+
### <a name="dsse-kms"></a> DSSE-KMS: Dual-layer Server-Encryption with KMS Managed Encryption Keys
324+
325+
By providing a dual-layer server-side encryption mechanism using AWS Key Management Service
326+
(AWS KMS) keys, known as DSSE-KMS, two layers of encryption are applied to objects upon their
327+
upload to Amazon S3. DSSE-KMS simplifies the process of meeting compliance requirements that
328+
mandate the implementation of multiple layers of encryption for data while maintaining complete
329+
control over the encryption keys.
330+
331+
332+
When uploading data encrypted with SSE-KMS, the sequence is as follows:
333+
334+
1. The S3A client must declare a specific CMK in the property `fs.s3a.encryption.key`, or leave
335+
it blank to use the default configured for that region.
336+
337+
2. The S3A client uploads all the data as normal, now including encryption information.
338+
339+
3. The S3 service encrypts the data with a symmetric key unique to the new object.
340+
341+
4. The S3 service retrieves the chosen CMK key from the KMS service, and, if the user has
342+
the right to use it, uses it to provide dual-layer encryption for the data.
343+
344+
345+
When downloading DSSE-KMS encrypted data, the sequence is as follows
346+
347+
1. The S3A client issues an HTTP GET request to read the data.
348+
349+
2. S3 sees that the data was encrypted with DSSE-KMS, and looks up the specific key in the
350+
KMS service.
351+
352+
3. If and only if the requesting user has been granted permission to use the CMS key does
353+
the KMS service provide S3 with the key.
354+
355+
4. As a result, S3 will only decode the data if the user has been granted access to the key.
356+
357+
358+
### Enabling DSSE-KMS
359+
360+
To enable DSSE-KMS, the property `fs.s3a.encryption.algorithm` must be set to `DSSE-KMS` in `core-site`:
361+
362+
```xml
363+
<property>
364+
<name>fs.s3a.encryption.algorithm</name>
365+
<value>DSSE-KMS</value>
366+
</property>
367+
```
368+
369+
The ID of the specific key used to encrypt the data should also be set in the property `fs.s3a.encryption.key`:
370+
371+
```xml
372+
<property>
373+
<name>fs.s3a.encryption.key</name>
374+
<value>arn:aws:kms:us-west-2:360379543683:key/071a86ff-8881-4ba0-9230-95af6d01ca01</value>
375+
</property>
376+
```
377+
378+
Organizations may define a default key in the Amazon KMS; if a default key is set,
379+
then it will be used whenever SSE-KMS encryption is chosen and the value of `fs.s3a.encryption.key` is empty.
380+
381+
### the S3A `fs.s3a.encryption.key` key only affects created files
382+
383+
With SSE-KMS, the S3A client option `fs.s3a.encryption.key` sets the
384+
key to be used when new files are created. When reading files, this key,
385+
and indeed the value of `fs.s3a.encryption.algorithm` is ignored:
386+
S3 will attempt to retrieve the key and decrypt the file based on the create-time settings.
387+
388+
This means that
389+
390+
* There's no need to configure any client simply reading data.
391+
* It is possible for a client to read data encrypted with one KMS key, and
392+
write it with another.
393+
394+
395+
319396
## <a name="best_practises"></a> Encryption best practises
320397

321398

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -447,7 +447,7 @@ and rate of requests. Spreading data across different buckets, and/or using
447447
a more balanced directory structure may be beneficial.
448448
Consult [the AWS documentation](http://docs.aws.amazon.com/AmazonS3/latest/dev/request-rate-perf-considerations.html).
449449

450-
Reading or writing data encrypted with SSE-KMS forces S3 to make calls of
450+
Reading or writing data encrypted with SSE-KMS or DSSE-KMS forces S3 to make calls of
451451
the AWS KMS Key Management Service, which comes with its own
452452
[Request Rate Limits](http://docs.aws.amazon.com/kms/latest/developerguide/limits.html).
453453
These default to 1200/second for an account, across all keys and all uses of

hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1033,7 +1033,7 @@ The specific tests an Assumed Role ARN is required for are
10331033
To run these tests you need:
10341034

10351035
1. A role in your AWS account will full read and write access rights to
1036-
the S3 bucket used in the tests, and KMS for any SSE-KMS tests.
1036+
the S3 bucket used in the tests, and KMS for any SSE-KMS or DSSE-KMS tests.
10371037

10381038

10391039
1. Your IAM User to have the permissions to "assume" that role.

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java

Lines changed: 24 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@
2828
import org.apache.hadoop.fs.Path;
2929

3030
import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_KEY;
31-
import static org.junit.Assert.assertEquals;
32-
import static org.junit.Assert.assertNull;
31+
import static org.assertj.core.api.Assertions.assertThat;
3332

3433
public final class EncryptionTestUtils {
3534

@@ -79,32 +78,36 @@ public static void assertEncrypted(S3AFileSystem fs,
7978
md.ssekmsKeyId());
8079
switch(algorithm) {
8180
case SSE_C:
82-
assertNull("Metadata algorithm should have been null in "
83-
+ details,
84-
md.serverSideEncryptionAsString());
85-
assertEquals("Wrong SSE-C algorithm in "
86-
+ details,
87-
SSE_C_ALGORITHM, md.sseCustomerAlgorithm());
81+
assertThat(md.serverSideEncryptionAsString())
82+
.describedAs("Details of the server-side encryption algorithm used: %s", details)
83+
.isNull();
84+
assertThat(md.sseCustomerAlgorithm())
85+
.describedAs("Details of SSE-C algorithm: %s", details)
86+
.isEqualTo(SSE_C_ALGORITHM);
8887
String md5Key = convertKeyToMd5(fs);
89-
assertEquals("getSSECustomerKeyMd5() wrong in " + details,
90-
md5Key, md.sseCustomerKeyMD5());
88+
assertThat(md.sseCustomerKeyMD5())
89+
.describedAs("Details of the customer provided encryption key: %s", details)
90+
.isEqualTo(md5Key);
9191
break;
9292
case SSE_KMS:
93-
assertEquals("Wrong algorithm in " + details,
94-
AWS_KMS_SSE_ALGORITHM, md.serverSideEncryptionAsString());
95-
assertEquals("Wrong KMS key in " + details,
96-
kmsKeyArn,
97-
md.ssekmsKeyId());
93+
assertThat(md.serverSideEncryptionAsString())
94+
.describedAs("Details of the server-side encryption algorithm used: %s", details)
95+
.isEqualTo(AWS_KMS_SSE_ALGORITHM);
96+
assertThat(md.ssekmsKeyId())
97+
.describedAs("Details of the KMS key: %s", details)
98+
.isEqualTo(kmsKeyArn);
9899
break;
99100
case DSSE_KMS:
100-
assertEquals("Wrong algorithm in " + details,
101-
AWS_KMS_DSSE_ALGORITHM, md.serverSideEncryptionAsString());
102-
assertEquals("Wrong KMS key in " + details,
103-
kmsKeyArn,
104-
md.ssekmsKeyId());
101+
assertThat(md.serverSideEncryptionAsString())
102+
.describedAs("Details of the server-side encryption algorithm used: %s", details)
103+
.isEqualTo(AWS_KMS_DSSE_ALGORITHM);
104+
assertThat(md.ssekmsKeyId())
105+
.describedAs("Details of the KMS key: %s", details)
106+
.isEqualTo(kmsKeyArn);
105107
break;
106108
default:
107-
assertEquals("AES256", md.serverSideEncryptionAsString());
109+
assertThat(md.serverSideEncryptionAsString())
110+
.isEqualTo("AES256");
108111
}
109112
}
110113

hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ADSSEEncryptionWithDefaultS3Settings.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ public void testEncryptionOverRename() throws Throwable {
123123
*/
124124
private void skipIfBucketNotKmsEncrypted() throws IOException {
125125
S3AFileSystem fs = getFileSystem();
126-
Path path = path(getMethodName() + "find-encryption-algo");
126+
Path path = methodPath();
127127
ContractTestUtils.touch(fs, path);
128128
try {
129129
String sseAlgorithm =

0 commit comments

Comments
 (0)