From 9e0b1be3f182935b9b01056e2af42944821a7a0e Mon Sep 17 00:00:00 2001 From: slfan1989 Date: Thu, 11 Jan 2024 10:18:57 +0800 Subject: [PATCH 001/164] Preparing for 3.4.1 development --- hadoop-assemblies/pom.xml | 4 ++-- hadoop-build-tools/pom.xml | 2 +- hadoop-client-modules/hadoop-client-api/pom.xml | 4 ++-- hadoop-client-modules/hadoop-client-check-invariants/pom.xml | 4 ++-- .../hadoop-client-check-test-invariants/pom.xml | 4 ++-- hadoop-client-modules/hadoop-client-integration-tests/pom.xml | 4 ++-- hadoop-client-modules/hadoop-client-minicluster/pom.xml | 4 ++-- hadoop-client-modules/hadoop-client-runtime/pom.xml | 4 ++-- hadoop-client-modules/hadoop-client/pom.xml | 4 ++-- hadoop-client-modules/pom.xml | 2 +- hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml | 4 ++-- hadoop-cloud-storage-project/hadoop-cos/pom.xml | 2 +- hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml | 4 ++-- hadoop-cloud-storage-project/pom.xml | 4 ++-- hadoop-common-project/hadoop-annotations/pom.xml | 4 ++-- hadoop-common-project/hadoop-auth-examples/pom.xml | 4 ++-- hadoop-common-project/hadoop-auth/pom.xml | 4 ++-- hadoop-common-project/hadoop-common/pom.xml | 4 ++-- hadoop-common-project/hadoop-kms/pom.xml | 4 ++-- hadoop-common-project/hadoop-minikdc/pom.xml | 4 ++-- hadoop-common-project/hadoop-nfs/pom.xml | 4 ++-- hadoop-common-project/hadoop-registry/pom.xml | 4 ++-- hadoop-common-project/pom.xml | 4 ++-- hadoop-dist/pom.xml | 4 ++-- hadoop-hdfs-project/hadoop-hdfs-client/pom.xml | 4 ++-- hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml | 4 ++-- hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml | 4 ++-- hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml | 4 ++-- hadoop-hdfs-project/hadoop-hdfs-rbf/pom.xml | 4 ++-- hadoop-hdfs-project/hadoop-hdfs/pom.xml | 4 ++-- hadoop-hdfs-project/pom.xml | 4 ++-- .../hadoop-mapreduce-client-app/pom.xml | 4 ++-- .../hadoop-mapreduce-client-common/pom.xml | 4 ++-- .../hadoop-mapreduce-client-core/pom.xml | 4 ++-- .../hadoop-mapreduce-client-hs-plugins/pom.xml | 4 ++-- .../hadoop-mapreduce-client-hs/pom.xml | 4 ++-- .../hadoop-mapreduce-client-jobclient/pom.xml | 4 ++-- .../hadoop-mapreduce-client-nativetask/pom.xml | 4 ++-- .../hadoop-mapreduce-client-shuffle/pom.xml | 4 ++-- .../hadoop-mapreduce-client-uploader/pom.xml | 4 ++-- hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml | 4 ++-- hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml | 4 ++-- hadoop-mapreduce-project/pom.xml | 4 ++-- hadoop-maven-plugins/pom.xml | 2 +- hadoop-minicluster/pom.xml | 4 ++-- hadoop-project-dist/pom.xml | 4 ++-- hadoop-project/pom.xml | 4 ++-- hadoop-tools/hadoop-aliyun/pom.xml | 2 +- hadoop-tools/hadoop-archive-logs/pom.xml | 4 ++-- hadoop-tools/hadoop-archives/pom.xml | 4 ++-- hadoop-tools/hadoop-aws/pom.xml | 4 ++-- .../hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java | 2 +- hadoop-tools/hadoop-azure-datalake/pom.xml | 2 +- hadoop-tools/hadoop-azure/pom.xml | 2 +- .../apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java | 2 +- hadoop-tools/hadoop-benchmark/pom.xml | 4 ++-- hadoop-tools/hadoop-datajoin/pom.xml | 4 ++-- hadoop-tools/hadoop-distcp/pom.xml | 4 ++-- .../hadoop-dynamometer/hadoop-dynamometer-blockgen/pom.xml | 4 ++-- .../hadoop-dynamometer/hadoop-dynamometer-dist/pom.xml | 4 ++-- .../hadoop-dynamometer/hadoop-dynamometer-infra/pom.xml | 4 ++-- .../hadoop-dynamometer/hadoop-dynamometer-workload/pom.xml | 4 ++-- hadoop-tools/hadoop-dynamometer/pom.xml | 4 ++-- hadoop-tools/hadoop-extras/pom.xml | 4 ++-- hadoop-tools/hadoop-federation-balance/pom.xml | 4 ++-- hadoop-tools/hadoop-fs2img/pom.xml | 4 ++-- hadoop-tools/hadoop-gridmix/pom.xml | 4 ++-- hadoop-tools/hadoop-kafka/pom.xml | 4 ++-- hadoop-tools/hadoop-openstack/pom.xml | 4 ++-- hadoop-tools/hadoop-pipes/pom.xml | 4 ++-- hadoop-tools/hadoop-resourceestimator/pom.xml | 2 +- hadoop-tools/hadoop-rumen/pom.xml | 4 ++-- hadoop-tools/hadoop-sls/pom.xml | 4 ++-- hadoop-tools/hadoop-streaming/pom.xml | 4 ++-- hadoop-tools/hadoop-tools-dist/pom.xml | 4 ++-- hadoop-tools/pom.xml | 4 ++-- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml | 4 ++-- .../hadoop-yarn-applications-catalog-docker/pom.xml | 2 +- .../hadoop-yarn-applications-catalog-webapp/pom.xml | 2 +- .../hadoop-yarn-applications-catalog/pom.xml | 2 +- .../hadoop-yarn-applications-distributedshell/pom.xml | 4 ++-- .../hadoop-yarn-applications-mawo-core/pom.xml | 2 +- .../hadoop-yarn-applications-mawo/pom.xml | 2 +- .../hadoop-yarn-applications-unmanaged-am-launcher/pom.xml | 4 ++-- .../hadoop-yarn-services/hadoop-yarn-services-api/pom.xml | 2 +- .../hadoop-yarn-services/hadoop-yarn-services-core/pom.xml | 2 +- .../hadoop-yarn-applications/hadoop-yarn-services/pom.xml | 2 +- .../hadoop-yarn/hadoop-yarn-applications/pom.xml | 4 ++-- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml | 4 ++-- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml | 4 ++-- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-csi/pom.xml | 2 +- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml | 4 ++-- .../hadoop-yarn-server-applicationhistoryservice/pom.xml | 4 ++-- .../hadoop-yarn-server/hadoop-yarn-server-common/pom.xml | 4 ++-- .../hadoop-yarn-server-globalpolicygenerator/pom.xml | 4 ++-- .../hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml | 4 ++-- .../hadoop-yarn-server-resourcemanager/pom.xml | 4 ++-- .../hadoop-yarn-server/hadoop-yarn-server-router/pom.xml | 4 ++-- .../hadoop-yarn-server-sharedcachemanager/pom.xml | 4 ++-- .../hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml | 4 ++-- .../hadoop-yarn-server-timeline-pluginstorage/pom.xml | 4 ++-- .../hadoop-yarn-server-timelineservice-documentstore/pom.xml | 2 +- .../hadoop-yarn-server-timelineservice-hbase-tests/pom.xml | 4 ++-- .../hadoop-yarn-server-timelineservice-hbase-client/pom.xml | 2 +- .../hadoop-yarn-server-timelineservice-hbase-common/pom.xml | 4 ++-- .../hadoop-yarn-server-timelineservice-hbase-server-1/pom.xml | 4 ++-- .../hadoop-yarn-server-timelineservice-hbase-server-2/pom.xml | 4 ++-- .../hadoop-yarn-server-timelineservice-hbase-server/pom.xml | 4 ++-- .../hadoop-yarn-server-timelineservice-hbase/pom.xml | 4 ++-- .../hadoop-yarn-server-timelineservice/pom.xml | 4 ++-- .../hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml | 4 ++-- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml | 4 ++-- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml | 4 ++-- hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/pom.xml | 4 ++-- hadoop-yarn-project/hadoop-yarn/pom.xml | 4 ++-- hadoop-yarn-project/pom.xml | 4 ++-- pom.xml | 4 ++-- 117 files changed, 213 insertions(+), 213 deletions(-) diff --git a/hadoop-assemblies/pom.xml b/hadoop-assemblies/pom.xml index 7b709fe29086d..f0101339896c6 100644 --- a/hadoop-assemblies/pom.xml +++ b/hadoop-assemblies/pom.xml @@ -23,11 +23,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-assemblies - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Assemblies Apache Hadoop Assemblies diff --git a/hadoop-build-tools/pom.xml b/hadoop-build-tools/pom.xml index 584d1fee281ba..19f2002a0928c 100644 --- a/hadoop-build-tools/pom.xml +++ b/hadoop-build-tools/pom.xml @@ -18,7 +18,7 @@ hadoop-main org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-build-tools diff --git a/hadoop-client-modules/hadoop-client-api/pom.xml b/hadoop-client-modules/hadoop-client-api/pom.xml index b4b81011eb517..d5dda5cfa530f 100644 --- a/hadoop-client-modules/hadoop-client-api/pom.xml +++ b/hadoop-client-modules/hadoop-client-api/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-client-api - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT jar Apache Hadoop Client diff --git a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml index eee5ecadec2bd..4ffe8e68c232e 100644 --- a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml +++ b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-client-check-invariants - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT pom diff --git a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml index bdf82d38ab568..63b48e317c734 100644 --- a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml +++ b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-client-check-test-invariants - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT pom diff --git a/hadoop-client-modules/hadoop-client-integration-tests/pom.xml b/hadoop-client-modules/hadoop-client-integration-tests/pom.xml index ba593ebd1b42d..0fe107fcde8eb 100644 --- a/hadoop-client-modules/hadoop-client-integration-tests/pom.xml +++ b/hadoop-client-modules/hadoop-client-integration-tests/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-client-integration-tests - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Checks that we can use the generated artifacts Apache Hadoop Client Packaging Integration Tests diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml index 9c9df2216fe8e..0ce68c09469a2 100644 --- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml +++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-client-minicluster - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT jar Apache Hadoop Minicluster for Clients diff --git a/hadoop-client-modules/hadoop-client-runtime/pom.xml b/hadoop-client-modules/hadoop-client-runtime/pom.xml index 1391da71ffd3c..5dac58f3f21bf 100644 --- a/hadoop-client-modules/hadoop-client-runtime/pom.xml +++ b/hadoop-client-modules/hadoop-client-runtime/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-client-runtime - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT jar Apache Hadoop Client diff --git a/hadoop-client-modules/hadoop-client/pom.xml b/hadoop-client-modules/hadoop-client/pom.xml index 08452aa20ef02..d549b55c58c16 100644 --- a/hadoop-client-modules/hadoop-client/pom.xml +++ b/hadoop-client-modules/hadoop-client/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project-dist hadoop-client - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Client aggregation pom with dependencies exposed Apache Hadoop Client Aggregator diff --git a/hadoop-client-modules/pom.xml b/hadoop-client-modules/pom.xml index fb4aedb0aeb43..a5503990555f1 100644 --- a/hadoop-client-modules/pom.xml +++ b/hadoop-client-modules/pom.xml @@ -18,7 +18,7 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-client-modules diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml index 6c8a0916802f2..ddbfb599e3c88 100644 --- a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-cloud-storage - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT jar Apache Hadoop Cloud Storage diff --git a/hadoop-cloud-storage-project/hadoop-cos/pom.xml b/hadoop-cloud-storage-project/hadoop-cos/pom.xml index ca7c4bf516cad..9c80989c52379 100644 --- a/hadoop-cloud-storage-project/hadoop-cos/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-cos/pom.xml @@ -20,7 +20,7 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-cos diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml b/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml index 4892a7ac8629f..92f29bfc6a405 100755 --- a/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml +++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml @@ -15,11 +15,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-huaweicloud - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop OBS support This module contains code to support integration with OBS. diff --git a/hadoop-cloud-storage-project/pom.xml b/hadoop-cloud-storage-project/pom.xml index 8df6bb41e9080..bf6ee95547809 100644 --- a/hadoop-cloud-storage-project/pom.xml +++ b/hadoop-cloud-storage-project/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-cloud-storage-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Cloud Storage Project Apache Hadoop Cloud Storage Project pom diff --git a/hadoop-common-project/hadoop-annotations/pom.xml b/hadoop-common-project/hadoop-annotations/pom.xml index a262d55b0426c..d01acf1d98cdb 100644 --- a/hadoop-common-project/hadoop-annotations/pom.xml +++ b/hadoop-common-project/hadoop-annotations/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-annotations - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Annotations Apache Hadoop Annotations jar diff --git a/hadoop-common-project/hadoop-auth-examples/pom.xml b/hadoop-common-project/hadoop-auth-examples/pom.xml index 4deda432797e0..ae70ec5a24ec7 100644 --- a/hadoop-common-project/hadoop-auth-examples/pom.xml +++ b/hadoop-common-project/hadoop-auth-examples/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-auth-examples - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT war Apache Hadoop Auth Examples diff --git a/hadoop-common-project/hadoop-auth/pom.xml b/hadoop-common-project/hadoop-auth/pom.xml index 433a615c606d3..14c955c7d256d 100644 --- a/hadoop-common-project/hadoop-auth/pom.xml +++ b/hadoop-common-project/hadoop-auth/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-auth - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT jar Apache Hadoop Auth diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 938d0c4506022..8f35d3a442f7e 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project-dist hadoop-common - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Common Apache Hadoop Common jar diff --git a/hadoop-common-project/hadoop-kms/pom.xml b/hadoop-common-project/hadoop-kms/pom.xml index 96588a22b9419..3dc1962ba8746 100644 --- a/hadoop-common-project/hadoop-kms/pom.xml +++ b/hadoop-common-project/hadoop-kms/pom.xml @@ -22,11 +22,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-kms - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT jar Apache Hadoop KMS diff --git a/hadoop-common-project/hadoop-minikdc/pom.xml b/hadoop-common-project/hadoop-minikdc/pom.xml index c292aebbe3656..bf8f84ba324a7 100644 --- a/hadoop-common-project/hadoop-minikdc/pom.xml +++ b/hadoop-common-project/hadoop-minikdc/pom.xml @@ -18,12 +18,12 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project 4.0.0 hadoop-minikdc - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MiniKDC Apache Hadoop MiniKDC jar diff --git a/hadoop-common-project/hadoop-nfs/pom.xml b/hadoop-common-project/hadoop-nfs/pom.xml index 1da5a25ad1e2e..689ed1063656b 100644 --- a/hadoop-common-project/hadoop-nfs/pom.xml +++ b/hadoop-common-project/hadoop-nfs/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-nfs - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT jar Apache Hadoop NFS diff --git a/hadoop-common-project/hadoop-registry/pom.xml b/hadoop-common-project/hadoop-registry/pom.xml index 725dda50f216b..05c34553df8ab 100644 --- a/hadoop-common-project/hadoop-registry/pom.xml +++ b/hadoop-common-project/hadoop-registry/pom.xml @@ -19,12 +19,12 @@ hadoop-project org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project 4.0.0 hadoop-registry - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Registry diff --git a/hadoop-common-project/pom.xml b/hadoop-common-project/pom.xml index f167a079a9b0c..4308aeb0fc18c 100644 --- a/hadoop-common-project/pom.xml +++ b/hadoop-common-project/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-common-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Common Project Apache Hadoop Common Project pom diff --git a/hadoop-dist/pom.xml b/hadoop-dist/pom.xml index e617fa765f98d..dc336358c649c 100644 --- a/hadoop-dist/pom.xml +++ b/hadoop-dist/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Distribution Apache Hadoop Distribution jar diff --git a/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml index 9e370788a6b61..6eb24902c24ca 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-client/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project-dist hadoop-hdfs-client - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop HDFS Client Apache Hadoop HDFS Client jar diff --git a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml index b5b264ffa8b54..ab989491cc65d 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-httpfs/pom.xml @@ -22,11 +22,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-hdfs-httpfs - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT jar Apache Hadoop HttpFS diff --git a/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml index 3f25354e293b9..9b8ac8186cfa9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-native-client/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project-dist hadoop-hdfs-native-client - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop HDFS Native Client Apache Hadoop HDFS Native Client jar diff --git a/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml index c234caf46e677..5b27ce57ef4c6 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-nfs/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-hdfs-nfs - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop HDFS-NFS Apache Hadoop HDFS-NFS jar diff --git a/hadoop-hdfs-project/hadoop-hdfs-rbf/pom.xml b/hadoop-hdfs-project/hadoop-hdfs-rbf/pom.xml index e3bb52365fe82..cbcfe26680842 100644 --- a/hadoop-hdfs-project/hadoop-hdfs-rbf/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs-rbf/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project-dist hadoop-hdfs-rbf - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop HDFS-RBF Apache Hadoop HDFS-RBF jar diff --git a/hadoop-hdfs-project/hadoop-hdfs/pom.xml b/hadoop-hdfs-project/hadoop-hdfs/pom.xml index 3abff73e76f0e..cbbedb8306928 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/pom.xml +++ b/hadoop-hdfs-project/hadoop-hdfs/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project-dist hadoop-hdfs - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop HDFS Apache Hadoop HDFS jar diff --git a/hadoop-hdfs-project/pom.xml b/hadoop-hdfs-project/pom.xml index 5992df05c20aa..ac6a19aebb5b1 100644 --- a/hadoop-hdfs-project/pom.xml +++ b/hadoop-hdfs-project/pom.xml @@ -20,11 +20,11 @@ https://maven.apache.org/xsd/maven-4.0.0.xsd"> org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-hdfs-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop HDFS Project Apache Hadoop HDFS Project pom diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml index e3b3511c0ce17..c1e915513fccc 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-app/pom.xml @@ -19,11 +19,11 @@ hadoop-mapreduce-client org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-mapreduce-client-app - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce App diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml index 38e7d2756d49e..6449eae980d46 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-common/pom.xml @@ -19,11 +19,11 @@ hadoop-mapreduce-client org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-mapreduce-client-common - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce Common diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml index 2f90a9051874d..77512f8c578ac 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-core/pom.xml @@ -19,11 +19,11 @@ hadoop-mapreduce-client org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-mapreduce-client-core - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce Core diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml index 37d4464cd76d3..9c75ad33aaf97 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs-plugins/pom.xml @@ -19,11 +19,11 @@ hadoop-mapreduce-client org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-mapreduce-client-hs-plugins - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce HistoryServer Plugins diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml index 21b93d87761ae..9cd9723d1ad04 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/pom.xml @@ -19,11 +19,11 @@ hadoop-mapreduce-client org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-mapreduce-client-hs - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce HistoryServer diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml index 17358a37da32d..774e8037c828b 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-jobclient/pom.xml @@ -19,11 +19,11 @@ hadoop-mapreduce-client org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-mapreduce-client-jobclient - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce JobClient diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml index 3ce8141c988de..7332c11aabe75 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-nativetask/pom.xml @@ -19,11 +19,11 @@ hadoop-mapreduce-client org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-mapreduce-client-nativetask - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce NativeTask diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml index 7117b4d97702f..559f05c7db3b6 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-shuffle/pom.xml @@ -19,11 +19,11 @@ hadoop-mapreduce-client org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-mapreduce-client-shuffle - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce Shuffle diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/pom.xml index 24e6e1ec68f42..39b131a5a87e2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/hadoop-mapreduce-client-uploader/pom.xml @@ -18,11 +18,11 @@ hadoop-mapreduce-client org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-mapreduce-client-uploader - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce Uploader diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml index eb770c4ff1987..708532271ca7c 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-client/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-mapreduce-client - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce Client pom diff --git a/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml b/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml index fac2ac0561eff..2443dd17ae2c2 100644 --- a/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml +++ b/hadoop-mapreduce-project/hadoop-mapreduce-examples/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-mapreduce-examples - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce Examples Apache Hadoop MapReduce Examples jar diff --git a/hadoop-mapreduce-project/pom.xml b/hadoop-mapreduce-project/pom.xml index 21554090d7855..993c905f5c811 100644 --- a/hadoop-mapreduce-project/pom.xml +++ b/hadoop-mapreduce-project/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-mapreduce - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT pom Apache Hadoop MapReduce https://hadoop.apache.org/ diff --git a/hadoop-maven-plugins/pom.xml b/hadoop-maven-plugins/pom.xml index 8765eb795b874..cb76c2f53b495 100644 --- a/hadoop-maven-plugins/pom.xml +++ b/hadoop-maven-plugins/pom.xml @@ -19,7 +19,7 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-maven-plugins diff --git a/hadoop-minicluster/pom.xml b/hadoop-minicluster/pom.xml index c0334b3fcc178..832af3e628414 100644 --- a/hadoop-minicluster/pom.xml +++ b/hadoop-minicluster/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-minicluster - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT jar Apache Hadoop Mini-Cluster diff --git a/hadoop-project-dist/pom.xml b/hadoop-project-dist/pom.xml index 53ec05b30bb09..dbf918caa28a1 100644 --- a/hadoop-project-dist/pom.xml +++ b/hadoop-project-dist/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-project-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Project Dist POM Apache Hadoop Project Dist POM pom diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 9fdcc0256be48..65f2c0b0e5d5b 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -20,10 +20,10 @@ org.apache.hadoop hadoop-main - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Project POM Apache Hadoop Project POM pom diff --git a/hadoop-tools/hadoop-aliyun/pom.xml b/hadoop-tools/hadoop-aliyun/pom.xml index 7605b18b5381f..2c43236589d37 100644 --- a/hadoop-tools/hadoop-aliyun/pom.xml +++ b/hadoop-tools/hadoop-aliyun/pom.xml @@ -18,7 +18,7 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-aliyun diff --git a/hadoop-tools/hadoop-archive-logs/pom.xml b/hadoop-tools/hadoop-archive-logs/pom.xml index bd64495dcae63..73663f2f4aae2 100644 --- a/hadoop-tools/hadoop-archive-logs/pom.xml +++ b/hadoop-tools/hadoop-archive-logs/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-archive-logs - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Archive Logs Apache Hadoop Archive Logs jar diff --git a/hadoop-tools/hadoop-archives/pom.xml b/hadoop-tools/hadoop-archives/pom.xml index b16b88d11dada..bf64818538dec 100644 --- a/hadoop-tools/hadoop-archives/pom.xml +++ b/hadoop-tools/hadoop-archives/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-archives - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Archives Apache Hadoop Archives jar diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index c5f921a874c1f..efe38a3bc9382 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-aws - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Amazon Web Services support This module contains code to support integration with Amazon Web Services. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java index 7f8dd043261b2..9ab8dcd5d5803 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/TestHttpReferrerAuditHeader.java @@ -204,7 +204,7 @@ public void testHeaderFiltering() throws Throwable { + "&id=e8ede3c7-8506-4a43-8268-fe8fcbb510a4-00000278&t0=154" + "&fs=e8ede3c7-8506-4a43-8268-fe8fcbb510a4&t1=156&" + "ts=1620905165700\"" - + " \"Hadoop 3.4.0-SNAPSHOT, java/1.8.0_282 vendor/AdoptOpenJDK\"" + + " \"Hadoop 3.4.1-SNAPSHOT, java/1.8.0_282 vendor/AdoptOpenJDK\"" + " -" + " TrIqtEYGWAwvu0h1N9WJKyoqM0TyHUaY+ZZBwP2yNf2qQp1Z/0=" + " SigV4" diff --git a/hadoop-tools/hadoop-azure-datalake/pom.xml b/hadoop-tools/hadoop-azure-datalake/pom.xml index 14ffa3798aa03..3b2a35accfcd2 100644 --- a/hadoop-tools/hadoop-azure-datalake/pom.xml +++ b/hadoop-tools/hadoop-azure-datalake/pom.xml @@ -19,7 +19,7 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-azure-datalake diff --git a/hadoop-tools/hadoop-azure/pom.xml b/hadoop-tools/hadoop-azure/pom.xml index e8c5fb78efd8d..5f03043185079 100644 --- a/hadoop-tools/hadoop-azure/pom.xml +++ b/hadoop-tools/hadoop-azure/pom.xml @@ -19,7 +19,7 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-azure diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index 4f87e02000249..d19c4470b2996 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -402,7 +402,7 @@ public static AbfsClient getMockAbfsClient(AbfsClient baseAbfsClientInstance, } // override user agent - String userAgent = "APN/1.0 Azure Blob FS/3.4.0-SNAPSHOT (PrivateBuild " + String userAgent = "APN/1.0 Azure Blob FS/3.4.1-SNAPSHOT (PrivateBuild " + "JavaJRE 1.8.0_252; Linux 5.3.0-59-generic/amd64; openssl-1.0; " + "UNKNOWN/UNKNOWN) MSFT"; client = ITestAbfsClient.setAbfsClientField(client, "userAgent", userAgent); diff --git a/hadoop-tools/hadoop-benchmark/pom.xml b/hadoop-tools/hadoop-benchmark/pom.xml index 20d928ef01ea7..5c6c7fad6ae74 100644 --- a/hadoop-tools/hadoop-benchmark/pom.xml +++ b/hadoop-tools/hadoop-benchmark/pom.xml @@ -22,11 +22,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project/pom.xml hadoop-benchmark - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT jar Apache Hadoop Common Benchmark diff --git a/hadoop-tools/hadoop-datajoin/pom.xml b/hadoop-tools/hadoop-datajoin/pom.xml index 890e60a318513..8a4cc9e94a5b9 100644 --- a/hadoop-tools/hadoop-datajoin/pom.xml +++ b/hadoop-tools/hadoop-datajoin/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-datajoin - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Data Join Apache Hadoop Data Join jar diff --git a/hadoop-tools/hadoop-distcp/pom.xml b/hadoop-tools/hadoop-distcp/pom.xml index cbdce3d76f576..c6b7c1ef4993a 100644 --- a/hadoop-tools/hadoop-distcp/pom.xml +++ b/hadoop-tools/hadoop-distcp/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-distcp - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Distributed Copy Apache Hadoop Distributed Copy jar diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-blockgen/pom.xml b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-blockgen/pom.xml index 2b8c4294066f3..8b643c0268570 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-blockgen/pom.xml +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-blockgen/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../../hadoop-project hadoop-dynamometer-blockgen - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Dynamometer Block Listing Generator Apache Hadoop Dynamometer Block Listing Generator jar diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-dist/pom.xml b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-dist/pom.xml index aa296b31520d5..a8040be9a7cdb 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-dist/pom.xml +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-dist/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../../hadoop-project-dist hadoop-dynamometer-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Dynamometer Dist Apache Hadoop Dynamometer Dist jar diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/pom.xml b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/pom.xml index b31f26163265f..c3421b05f0361 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/pom.xml +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-infra/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../../hadoop-project hadoop-dynamometer-infra - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Dynamometer Cluster Simulator Apache Hadoop Dynamometer Cluster Simulator jar diff --git a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/pom.xml b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/pom.xml index c06de341b0329..852457e0e7763 100644 --- a/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/pom.xml +++ b/hadoop-tools/hadoop-dynamometer/hadoop-dynamometer-workload/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../../hadoop-project hadoop-dynamometer-workload - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Dynamometer Workload Simulator Apache Hadoop Dynamometer Workload Simulator jar diff --git a/hadoop-tools/hadoop-dynamometer/pom.xml b/hadoop-tools/hadoop-dynamometer/pom.xml index 30e28b6bf6bfb..577d2412d056c 100644 --- a/hadoop-tools/hadoop-dynamometer/pom.xml +++ b/hadoop-tools/hadoop-dynamometer/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-dynamometer - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Dynamometer Apache Hadoop Dynamometer pom diff --git a/hadoop-tools/hadoop-extras/pom.xml b/hadoop-tools/hadoop-extras/pom.xml index d6e0ba07dc4f8..00b5e89dfdcba 100644 --- a/hadoop-tools/hadoop-extras/pom.xml +++ b/hadoop-tools/hadoop-extras/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-extras - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Extras Apache Hadoop Extras jar diff --git a/hadoop-tools/hadoop-federation-balance/pom.xml b/hadoop-tools/hadoop-federation-balance/pom.xml index 036193da7a46b..26147ea272c29 100644 --- a/hadoop-tools/hadoop-federation-balance/pom.xml +++ b/hadoop-tools/hadoop-federation-balance/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-federation-balance - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Federation Balance Apache Hadoop Federation Balance jar diff --git a/hadoop-tools/hadoop-fs2img/pom.xml b/hadoop-tools/hadoop-fs2img/pom.xml index 8c5ed0224bf14..3117363812336 100644 --- a/hadoop-tools/hadoop-fs2img/pom.xml +++ b/hadoop-tools/hadoop-fs2img/pom.xml @@ -17,12 +17,12 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project org.apache.hadoop hadoop-fs2img - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Image Generation Tool Apache Hadoop Image Generation Tool jar diff --git a/hadoop-tools/hadoop-gridmix/pom.xml b/hadoop-tools/hadoop-gridmix/pom.xml index 21d786c342d6c..475e1e4e6c43f 100644 --- a/hadoop-tools/hadoop-gridmix/pom.xml +++ b/hadoop-tools/hadoop-gridmix/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-gridmix - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Gridmix Apache Hadoop Gridmix jar diff --git a/hadoop-tools/hadoop-kafka/pom.xml b/hadoop-tools/hadoop-kafka/pom.xml index d8c01eda23bee..cf2475450bfa5 100644 --- a/hadoop-tools/hadoop-kafka/pom.xml +++ b/hadoop-tools/hadoop-kafka/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-kafka - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Kafka Library support This module contains code to support integration with Kafka. diff --git a/hadoop-tools/hadoop-openstack/pom.xml b/hadoop-tools/hadoop-openstack/pom.xml index a3f0e748454a1..b174feefa61ed 100644 --- a/hadoop-tools/hadoop-openstack/pom.xml +++ b/hadoop-tools/hadoop-openstack/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-openstack - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop OpenStack support This module used to contain code to support integration with OpenStack. diff --git a/hadoop-tools/hadoop-pipes/pom.xml b/hadoop-tools/hadoop-pipes/pom.xml index 2d991575afa26..2ddcfbfa5432f 100644 --- a/hadoop-tools/hadoop-pipes/pom.xml +++ b/hadoop-tools/hadoop-pipes/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-pipes - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Pipes Apache Hadoop Pipes pom diff --git a/hadoop-tools/hadoop-resourceestimator/pom.xml b/hadoop-tools/hadoop-resourceestimator/pom.xml index a6f6c691b36f6..ec891d8713c4b 100644 --- a/hadoop-tools/hadoop-resourceestimator/pom.xml +++ b/hadoop-tools/hadoop-resourceestimator/pom.xml @@ -25,7 +25,7 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-resourceestimator diff --git a/hadoop-tools/hadoop-rumen/pom.xml b/hadoop-tools/hadoop-rumen/pom.xml index 9a966d55c0ed1..4344fea47175d 100644 --- a/hadoop-tools/hadoop-rumen/pom.xml +++ b/hadoop-tools/hadoop-rumen/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-rumen - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Rumen Apache Hadoop Rumen jar diff --git a/hadoop-tools/hadoop-sls/pom.xml b/hadoop-tools/hadoop-sls/pom.xml index 208cbdf6c142a..2ff6851d5cf48 100644 --- a/hadoop-tools/hadoop-sls/pom.xml +++ b/hadoop-tools/hadoop-sls/pom.xml @@ -19,11 +19,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-sls - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Scheduler Load Simulator Apache Hadoop Scheduler Load Simulator jar diff --git a/hadoop-tools/hadoop-streaming/pom.xml b/hadoop-tools/hadoop-streaming/pom.xml index 33e6ca8ff7491..cd3d183545b38 100644 --- a/hadoop-tools/hadoop-streaming/pom.xml +++ b/hadoop-tools/hadoop-streaming/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-streaming - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop MapReduce Streaming Apache Hadoop MapReduce Streaming jar diff --git a/hadoop-tools/hadoop-tools-dist/pom.xml b/hadoop-tools/hadoop-tools-dist/pom.xml index 8a3e93c1037d3..b785d00db6d30 100644 --- a/hadoop-tools/hadoop-tools-dist/pom.xml +++ b/hadoop-tools/hadoop-tools-dist/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project-dist hadoop-tools-dist - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Tools Dist Apache Hadoop Tools Dist jar diff --git a/hadoop-tools/pom.xml b/hadoop-tools/pom.xml index 4e934cd101f85..1bdc0e3d4860f 100644 --- a/hadoop-tools/pom.xml +++ b/hadoop-tools/pom.xml @@ -20,11 +20,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-tools - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Tools Apache Hadoop Tools pom diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml index e4b8ee2822788..9bc1f1737a5a4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-api/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-api - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN API diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-docker/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-docker/pom.xml index 3899f5bb96afe..41ffcd705bd45 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-docker/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-docker/pom.xml @@ -23,7 +23,7 @@ hadoop-yarn-applications-catalog org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Application Catalog Docker Image diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/pom.xml index 1a2c37faad4cd..6b007458068d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/hadoop-yarn-applications-catalog-webapp/pom.xml @@ -23,7 +23,7 @@ hadoop-yarn-applications-catalog org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Application Catalog Webapp diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/pom.xml index 5269f67f1a372..eadf34799567d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-catalog/pom.xml @@ -19,7 +19,7 @@ hadoop-yarn-applications org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT org.apache.hadoop diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml index d1cd362c7d65d..b40b8a78a7b69 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-distributedshell/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn-applications org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-applications-distributedshell - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN DistributedShell diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-mawo/hadoop-yarn-applications-mawo-core/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-mawo/hadoop-yarn-applications-mawo-core/pom.xml index 770fceaaa3678..d4442c8247258 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-mawo/hadoop-yarn-applications-mawo-core/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-mawo/hadoop-yarn-applications-mawo-core/pom.xml @@ -15,7 +15,7 @@ hadoop-yarn-applications-mawo org.apache.hadoop.applications.mawo - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-mawo/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-mawo/pom.xml index ce8e14c1ccfb3..4a91575e7343d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-mawo/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-mawo/pom.xml @@ -15,7 +15,7 @@ hadoop-yarn-applications org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/pom.xml index 67be3758a5b55..745bb6dce0b29 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-applications-unmanaged-am-launcher/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn-applications org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-applications-unmanaged-am-launcher - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Unmanaged Am Launcher diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/pom.xml index dbe0c69d5508b..5c211e5f22dbe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-api/pom.xml @@ -19,7 +19,7 @@ org.apache.hadoop hadoop-yarn-services - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT hadoop-yarn-services-api Apache Hadoop YARN Services API diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/pom.xml index 1ff770cc42023..c1a581bfcaab4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/hadoop-yarn-services-core/pom.xml @@ -19,7 +19,7 @@ org.apache.hadoop hadoop-yarn-services - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT hadoop-yarn-services-core jar diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/pom.xml index 33fcaa62260eb..0336859a88f72 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/hadoop-yarn-services/pom.xml @@ -19,7 +19,7 @@ hadoop-yarn-applications org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-services diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml index aad4ab1f9a651..c8f49f446f9e8 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-applications/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-applications - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Applications pom diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml index 5eedf486121a3..fdc7da7114cbb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-client/pom.xml @@ -17,10 +17,10 @@ hadoop-yarn org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT hadoop-yarn-client - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Client diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml index 4f6b40891d38e..3d6d3d4161afe 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-common - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Common diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-csi/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-csi/pom.xml index 38bc9085abf2c..2488028629dc4 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-csi/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-csi/pom.xml @@ -18,7 +18,7 @@ hadoop-yarn org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-csi diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml index 39081e5cd3101..20aeb85243b2c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-registry/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-registry - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Registry diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/pom.xml index eb68251aa607a..d2fff06b035c2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-applicationhistoryservice/pom.xml @@ -22,11 +22,11 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-applicationhistoryservice - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN ApplicationHistoryService diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml index c5142c116c2aa..9b440de9365b1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-common/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-common - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Server Common diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-globalpolicygenerator/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-globalpolicygenerator/pom.xml index b56c60e81763d..2371a5289d227 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-globalpolicygenerator/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-globalpolicygenerator/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 org.apache.hadoop hadoop-yarn-server-globalpolicygenerator - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN GlobalPolicyGenerator diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml index 3b5c373f50c33..c78cd42a62d3b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-nodemanager - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN NodeManager diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml index fcd68ab2f52a6..507a493e7ad7f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-resourcemanager - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN ResourceManager diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/pom.xml index b171876471772..6dded4a9abe06 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-router/pom.xml @@ -19,12 +19,12 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 org.apache.hadoop hadoop-yarn-server-router - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Router diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-sharedcachemanager/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-sharedcachemanager/pom.xml index 971fb0941a3ba..7f6e099d30a3b 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-sharedcachemanager/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-sharedcachemanager/pom.xml @@ -17,10 +17,10 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT hadoop-yarn-server-sharedcachemanager - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN SharedCacheManager diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml index 07838688d7099..0d76a7f30cc0c 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-tests/pom.xml @@ -19,10 +19,10 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT hadoop-yarn-server-tests - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Server Tests diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/pom.xml index 1b80d25830796..d985876128880 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timeline-pluginstorage/pom.xml @@ -22,11 +22,11 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-timeline-pluginstorage - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Timeline Plugin Storage diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-documentstore/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-documentstore/pom.xml index 56089a42ea87b..5c269d7840c4a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-documentstore/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-documentstore/pom.xml @@ -19,7 +19,7 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-timelineservice-documentstore diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase-tests/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase-tests/pom.xml index 6f2fce097df73..01c24fc2b5c52 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase-tests/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase-tests/pom.xml @@ -22,11 +22,11 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-timelineservice-hbase-tests - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN TimelineService HBase tests diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/pom.xml index 0d61513889a5f..b845c1c4dde5e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-client/pom.xml @@ -22,7 +22,7 @@ hadoop-yarn-server-timelineservice-hbase org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-timelineservice-hbase-client diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/pom.xml index 63e3389253cac..b05a342d536fb 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-common/pom.xml @@ -22,13 +22,13 @@ hadoop-yarn-server-timelineservice-hbase org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-timelineservice-hbase-common Apache Hadoop YARN TimelineService HBase Common - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/hadoop-yarn-server-timelineservice-hbase-server-1/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/hadoop-yarn-server-timelineservice-hbase-server-1/pom.xml index 20fc1fd65f019..7e907207e190f 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/hadoop-yarn-server-timelineservice-hbase-server-1/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/hadoop-yarn-server-timelineservice-hbase-server-1/pom.xml @@ -22,13 +22,13 @@ hadoop-yarn-server-timelineservice-hbase-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-timelineservice-hbase-server-1 Apache Hadoop YARN TimelineService HBase Server 1.7 - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/hadoop-yarn-server-timelineservice-hbase-server-2/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/hadoop-yarn-server-timelineservice-hbase-server-2/pom.xml index 7daa5782d69a7..2bf91d910c5b0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/hadoop-yarn-server-timelineservice-hbase-server-2/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/hadoop-yarn-server-timelineservice-hbase-server-2/pom.xml @@ -22,13 +22,13 @@ hadoop-yarn-server-timelineservice-hbase-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-timelineservice-hbase-server-2 Apache Hadoop YARN TimelineService HBase Server 2.2 - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/pom.xml index 02961a6c10d2c..2e47047f2fe78 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/hadoop-yarn-server-timelineservice-hbase-server/pom.xml @@ -22,12 +22,12 @@ hadoop-yarn-server-timelineservice-hbase org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-timelineservice-hbase-server - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN TimelineService HBase Servers pom diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/pom.xml index c824202fe6c49..3f9b10cf8a00a 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice-hbase/pom.xml @@ -22,12 +22,12 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-timelineservice-hbase - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN TimelineService HBase Backend pom diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/pom.xml index 5a2823ad5eff5..87ab85f0d2fa3 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-timelineservice/pom.xml @@ -22,11 +22,11 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-timelineservice - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Timeline Service diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml index 15df5456810ce..aaa07ae82594d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-web-proxy/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn-server org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server-web-proxy - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Web Proxy diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml index 83c43feb11407..8ac3aea1d7bb7 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-server - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Server pom diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml index 827161811f76d..deca038ef33db 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-site/pom.xml @@ -19,11 +19,11 @@ hadoop-yarn org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-site - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN Site pom diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/pom.xml b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/pom.xml index e18a7eac3387d..830c523bea9a2 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-ui/pom.xml @@ -20,11 +20,11 @@ hadoop-yarn org.apache.hadoop - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT 4.0.0 hadoop-yarn-ui - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop YARN UI ${packagingType} diff --git a/hadoop-yarn-project/hadoop-yarn/pom.xml b/hadoop-yarn-project/hadoop-yarn/pom.xml index e97e35608553a..89abf5d3f2365 100644 --- a/hadoop-yarn-project/hadoop-yarn/pom.xml +++ b/hadoop-yarn-project/hadoop-yarn/pom.xml @@ -17,11 +17,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../../hadoop-project hadoop-yarn - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT pom Apache Hadoop YARN diff --git a/hadoop-yarn-project/pom.xml b/hadoop-yarn-project/pom.xml index 241e3bc237a0e..43b34db46d8af 100644 --- a/hadoop-yarn-project/pom.xml +++ b/hadoop-yarn-project/pom.xml @@ -18,11 +18,11 @@ org.apache.hadoop hadoop-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT ../hadoop-project hadoop-yarn-project - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT pom Apache Hadoop YARN Project https://hadoop.apache.org/yarn/ diff --git a/pom.xml b/pom.xml index 13e3aec63efba..ed13757ca4a72 100644 --- a/pom.xml +++ b/pom.xml @@ -18,7 +18,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x 4.0.0 org.apache.hadoop hadoop-main - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT Apache Hadoop Main Apache Hadoop Main pom @@ -80,7 +80,7 @@ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/x - 3.4.0-SNAPSHOT + 3.4.1-SNAPSHOT apache.snapshots.https Apache Development Snapshot Repository From 3bb2ee6af0aa1fb0903b522acbe9bf289ee6cd30 Mon Sep 17 00:00:00 2001 From: hfutatzhanghb Date: Thu, 11 Jan 2024 21:08:37 +0800 Subject: [PATCH 002/164] HDFS-17312. packetsReceived metric should ignore heartbeat packet. (#6394) Signed-off-by: Takanobu Asanuma (cherry picked from commit 6a053765ee12dae35dbd69cb949c999aaedc0643) --- .../apache/hadoop/hdfs/server/datanode/BlockReceiver.java | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java index 4829e8c578635..86ee6bd431ef7 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BlockReceiver.java @@ -38,6 +38,7 @@ import org.apache.hadoop.fs.ChecksumException; import org.apache.hadoop.fs.FSOutputSummer; import org.apache.hadoop.fs.StorageType; +import org.apache.hadoop.hdfs.DFSPacket; import org.apache.hadoop.hdfs.DFSUtilClient; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; import org.apache.hadoop.hdfs.protocol.ExtendedBlock; @@ -598,7 +599,9 @@ private int receivePacket() throws IOException { return 0; } - datanode.metrics.incrPacketsReceived(); + if (seqno != DFSPacket.HEART_BEAT_SEQNO) { + datanode.metrics.incrPacketsReceived(); + } //First write the packet to the mirror: if (mirrorOut != null && !mirrorError) { try { From eb959cb885271aaf3f426caf4bb26965295b8384 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Sun, 14 Jan 2024 18:30:40 +0800 Subject: [PATCH 003/164] HADOOP-19034. Fix Download Maven Url Not Found. (#6438). Contributed by Shilun Fan. Reviewed-by: Steve Loughran Signed-off-by: He Xiaoqiao (cherry picked from commit 0f8b74b03f7ffc616fec3d4712b4386237628eec) --- dev-support/docker/pkg-resolver/install-maven.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-support/docker/pkg-resolver/install-maven.sh b/dev-support/docker/pkg-resolver/install-maven.sh index d1d0dc97fe5e4..fb7d4a5be77dc 100644 --- a/dev-support/docker/pkg-resolver/install-maven.sh +++ b/dev-support/docker/pkg-resolver/install-maven.sh @@ -40,7 +40,7 @@ fi if [ "$version_to_install" == "3.6.3" ]; then mkdir -p /opt/maven /tmp/maven && - curl -L -s -S https://dlcdn.apache.org/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz \ + curl -L -s -S https://repo.maven.apache.org/maven2/org/apache/maven/apache-maven/3.6.3/apache-maven-3.6.3-bin.tar.gz \ -o /tmp/maven/apache-maven-3.6.3-bin.tar.gz && tar xzf /tmp/maven/apache-maven-3.6.3-bin.tar.gz --strip-components 1 -C /opt/maven else From fa80205ad702bab011fe5edee5635bf8a539d5b1 Mon Sep 17 00:00:00 2001 From: Hexiaoqiao Date: Wed, 17 Jan 2024 15:00:06 +0800 Subject: [PATCH 004/164] HADOOP-19031. Enhance access control for RunJar. (#6427). Contributed by He Xiaoqiao. Signed-off-by: Shuyan Zhang Signed-off-by: Shilun Fan Signed-off-by: Ayush Saxena (cherry picked from commit 9634bd31e6594312b68d9e07b736d18d29f7648c) --- .../main/java/org/apache/hadoop/util/RunJar.java | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java index c28e69f54611e..e527f602cdd31 100644 --- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java +++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/util/RunJar.java @@ -28,10 +28,14 @@ import java.net.URL; import java.net.URLClassLoader; import java.nio.file.Files; +import java.nio.file.attribute.FileAttribute; +import java.nio.file.attribute.PosixFilePermission; +import java.nio.file.attribute.PosixFilePermissions; import java.util.ArrayList; import java.util.Arrays; import java.util.Enumeration; import java.util.List; +import java.util.Set; import java.util.jar.JarEntry; import java.util.jar.JarFile; import java.util.jar.JarInputStream; @@ -287,20 +291,18 @@ public void run(String[] args) throws Throwable { final File workDir; try { - workDir = File.createTempFile("hadoop-unjar", "", tmpDir); - } catch (IOException ioe) { + FileAttribute> perms = PosixFilePermissions + .asFileAttribute(PosixFilePermissions.fromString("rwx------")); + workDir = Files.createTempDirectory(tmpDir.toPath(), "hadoop-unjar", perms).toFile(); + } catch (IOException | SecurityException e) { // If user has insufficient perms to write to tmpDir, default // "Permission denied" message doesn't specify a filename. System.err.println("Error creating temp dir in java.io.tmpdir " - + tmpDir + " due to " + ioe.getMessage()); + + tmpDir + " due to " + e.getMessage()); System.exit(-1); return; } - if (!workDir.delete()) { - System.err.println("Delete failed for " + workDir); - System.exit(-1); - } ensureDirectory(workDir); ShutdownHookManager.get().addShutdownHook( From 67d985620fdd3b397634edd5424392358c9e6db0 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Tue, 16 Jan 2024 22:11:16 +0800 Subject: [PATCH 005/164] HADOOP-19040. mvn site commands fails due to MetricsSystem And MetricsSystemImpl changes. (#6450) Contributed by Shilun Fan. Reviewed-by: Steve Loughran Signed-off-by: Shilun Fan --- .../hadoop-common/dev-support/jdiff-workaround.patch | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff-workaround.patch b/hadoop-common-project/hadoop-common/dev-support/jdiff-workaround.patch index 2bd7b63f0178f..5b6cd3af825b0 100644 --- a/hadoop-common-project/hadoop-common/dev-support/jdiff-workaround.patch +++ b/hadoop-common-project/hadoop-common/dev-support/jdiff-workaround.patch @@ -14,7 +14,7 @@ index a277abd6e13..1d131d5db6e 100644 - * the annotations of the source object.) - * @param desc the description of the source (or null. See above.) - * @return the source object -- * @exception MetricsException +- * @exception MetricsException Metrics Exception. - */ - public abstract T register(String name, String desc, T source); - @@ -38,7 +38,7 @@ index a277abd6e13..1d131d5db6e 100644 + * the annotations of the source object.) + * @param desc the description of the source (or null. See above.) + * @return the source object - * @exception MetricsException + * @exception MetricsException Metrics Exception. */ - public abstract - T register(String name, String desc, T sink); @@ -65,7 +65,6 @@ index a6edf08e5a7..5b87be1ec67 100644 - } - return sink; - } -- allSinks.put(name, sink); - if (config != null) { - registerSink(name, description, sink); - } From a38d902677998dfce2bce4fc8f842cfda477741c Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Mon, 15 Jan 2024 15:44:17 +0800 Subject: [PATCH 006/164] YARN-11634. [Addendum] Speed-up TestTimelineClient. (#6419) Co-authored-by: slfan1989 --- .../client/api/impl/TimelineClientImpl.java | 6 ++++ .../client/api/impl/TimelineConnector.java | 28 +++++++++---------- .../client/api/impl/TestTimelineClient.java | 4 +-- 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java index 2b9ce4fa8f2ad..45da0f444ba0d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineClientImpl.java @@ -459,4 +459,10 @@ public void putDomain(ApplicationAttemptId appAttemptId, public void setTimelineWriter(TimelineWriter writer) { this.timelineWriter = writer; } + + @Private + @VisibleForTesting + public TimelineConnector getConnector() { + return connector; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineConnector.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineConnector.java index b139bddd101e0..dce877f3cb83e 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineConnector.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/client/api/impl/TimelineConnector.java @@ -78,8 +78,8 @@ public class TimelineConnector extends AbstractService { private static final Joiner JOINER = Joiner.on(""); private static final Logger LOG = LoggerFactory.getLogger(TimelineConnector.class); - @VisibleForTesting - public static int DEFAULT_SOCKET_TIMEOUT = 60_000; // 1 minute + + private int socketTimeOut = 60_000; private SSLFactory sslFactory; Client client; @@ -113,7 +113,7 @@ protected void serviceInit(Configuration conf) throws Exception { sslFactory = getSSLFactory(conf); connConfigurator = getConnConfigurator(sslFactory); } else { - connConfigurator = DEFAULT_TIMEOUT_CONN_CONFIGURATOR; + connConfigurator = defaultTimeoutConnConfigurator; } String defaultAuth = UserGroupInformation.isSecurityEnabled() ? KerberosAuthenticationHandler.TYPE : @@ -140,23 +140,18 @@ protected void serviceInit(Configuration conf) throws Exception { } } - private static final ConnectionConfigurator DEFAULT_TIMEOUT_CONN_CONFIGURATOR - = new ConnectionConfigurator() { - @Override - public HttpURLConnection configure(HttpURLConnection conn) - throws IOException { - setTimeouts(conn, DEFAULT_SOCKET_TIMEOUT); - return conn; - } - }; + private ConnectionConfigurator defaultTimeoutConnConfigurator = conn -> { + setTimeouts(conn, socketTimeOut); + return conn; + }; private ConnectionConfigurator getConnConfigurator(SSLFactory sslFactoryObj) { try { - return initSslConnConfigurator(DEFAULT_SOCKET_TIMEOUT, sslFactoryObj); + return initSslConnConfigurator(socketTimeOut, sslFactoryObj); } catch (Exception e) { LOG.debug("Cannot load customized ssl related configuration. " + "Fallback to system-generic settings.", e); - return DEFAULT_TIMEOUT_CONN_CONFIGURATOR; + return defaultTimeoutConnConfigurator; } } @@ -457,4 +452,9 @@ public boolean shouldRetryOn(Exception e) { || e instanceof SocketTimeoutException); } } + + @VisibleForTesting + public void setSocketTimeOut(int socketTimeOut) { + this.socketTimeOut = socketTimeOut; + } } diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java index cac620f669521..80e425e4853d0 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/client/api/impl/TestTimelineClient.java @@ -78,7 +78,7 @@ public void setup() { conf.setBoolean(YarnConfiguration.TIMELINE_SERVICE_ENABLED, true); conf.setFloat(YarnConfiguration.TIMELINE_SERVICE_VERSION, 1.0f); client = createTimelineClient(conf); - TimelineConnector.DEFAULT_SOCKET_TIMEOUT = 10; + client.getConnector().setSocketTimeOut(10); } @AfterEach @@ -89,7 +89,7 @@ public void tearDown() throws Exception { if (isSSLConfigured()) { KeyStoreTestUtil.cleanupSSLConfig(keystoresDir, sslConfDir); } - TimelineConnector.DEFAULT_SOCKET_TIMEOUT = 60_000; + client.getConnector().setSocketTimeOut(60_000); } @Test From 76887c1b4978d7bb092ad2c1897f6f2c4d369a69 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Sat, 20 Jan 2024 07:51:55 +0800 Subject: [PATCH 007/164] Revert "HDFS-16016. BPServiceActor to provide new thread to handle IBR (#2998)" (#6457) Contributed by Shilun Fan. This reverts commit c1bf3cb0. Reviewed-by: Takanobu Asanuma Reviewed-by: He Xiaoqiao Reviewed-by: Ayush Saxena Reviewed-by: Viraj Jasani Signed-off-by: Shilun Fan --- .../hdfs/server/datanode/BPServiceActor.java | 62 +++---------------- .../hadoop/hdfs/TestDatanodeReport.java | 17 +---- .../datanode/TestIncrementalBlockReports.java | 24 ++----- 3 files changed, 17 insertions(+), 86 deletions(-) diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java index b552fa277d049..4bac0d8fb47fd 100755 --- a/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/BPServiceActor.java @@ -36,8 +36,6 @@ import java.util.TreeSet; import java.util.concurrent.BlockingQueue; import java.util.concurrent.CountDownLatch; -import java.util.concurrent.ExecutorService; -import java.util.concurrent.Executors; import java.util.concurrent.LinkedBlockingQueue; import java.util.concurrent.ThreadLocalRandom; import java.util.concurrent.atomic.AtomicBoolean; @@ -73,7 +71,6 @@ import org.apache.hadoop.io.IOUtils; import org.apache.hadoop.ipc.RemoteException; import org.apache.hadoop.net.NetUtils; -import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder; import org.apache.hadoop.util.Preconditions; import org.apache.hadoop.util.Time; import org.apache.hadoop.util.VersionInfo; @@ -103,8 +100,6 @@ class BPServiceActor implements Runnable { volatile long lastCacheReport = 0; private final Scheduler scheduler; - private final Object sendIBRLock; - private final ExecutorService ibrExecutorService; Thread bpThread; DatanodeProtocolClientSideTranslatorPB bpNamenode; @@ -161,10 +156,6 @@ enum RunningState { } commandProcessingThread = new CommandProcessingThread(this); commandProcessingThread.start(); - sendIBRLock = new Object(); - ibrExecutorService = Executors.newSingleThreadExecutor( - new ThreadFactoryBuilder().setDaemon(true) - .setNameFormat("ibr-executor-%d").build()); } public DatanodeRegistration getBpRegistration() { @@ -397,10 +388,8 @@ List blockReport(long fullBrLeaseId) throws IOException { // we have a chance that we will miss the delHint information // or we will report an RBW replica after the BlockReport already reports // a FINALIZED one. - synchronized (sendIBRLock) { - ibrManager.sendIBRs(bpNamenode, bpRegistration, - bpos.getBlockPoolId(), getRpcMetricSuffix()); - } + ibrManager.sendIBRs(bpNamenode, bpRegistration, + bpos.getBlockPoolId(), getRpcMetricSuffix()); long brCreateStartTime = monotonicNow(); Map perVolumeBlockLists = @@ -633,9 +622,6 @@ void stop() { if (commandProcessingThread != null) { commandProcessingThread.interrupt(); } - if (ibrExecutorService != null && !ibrExecutorService.isShutdown()) { - ibrExecutorService.shutdownNow(); - } } //This must be called only by blockPoolManager @@ -650,18 +636,13 @@ void join() { } catch (InterruptedException ie) { } } - // Cleanup method to be called by current thread before exiting. - // Any Thread / ExecutorService started by BPServiceActor can be shutdown - // here. + //Cleanup method to be called by current thread before exiting. private synchronized void cleanUp() { shouldServiceRun = false; IOUtils.cleanupWithLogger(null, bpNamenode); IOUtils.cleanupWithLogger(null, lifelineSender); bpos.shutdownActor(this); - if (!ibrExecutorService.isShutdown()) { - ibrExecutorService.shutdownNow(); - } } private void handleRollingUpgradeStatus(HeartbeatResponse resp) throws IOException { @@ -757,6 +738,11 @@ private void offerService() throws Exception { isSlownode = resp.getIsSlownode(); } } + if (!dn.areIBRDisabledForTests() && + (ibrManager.sendImmediately()|| sendHeartbeat)) { + ibrManager.sendIBRs(bpNamenode, bpRegistration, + bpos.getBlockPoolId(), getRpcMetricSuffix()); + } List cmds = null; boolean forceFullBr = @@ -923,10 +909,6 @@ public void run() { initialRegistrationComplete.countDown(); } - // IBR tasks to be handled separately from offerService() in order to - // improve performance of offerService(), which can now focus only on - // FBR and heartbeat. - ibrExecutorService.submit(new IBRTaskHandler()); while (shouldRun()) { try { offerService(); @@ -1159,34 +1141,6 @@ private void sendLifeline() throws IOException { } } - class IBRTaskHandler implements Runnable { - - @Override - public void run() { - LOG.info("Starting IBR Task Handler."); - while (shouldRun()) { - try { - final long startTime = scheduler.monotonicNow(); - final boolean sendHeartbeat = scheduler.isHeartbeatDue(startTime); - if (!dn.areIBRDisabledForTests() && - (ibrManager.sendImmediately() || sendHeartbeat)) { - synchronized (sendIBRLock) { - ibrManager.sendIBRs(bpNamenode, bpRegistration, - bpos.getBlockPoolId(), getRpcMetricSuffix()); - } - } - // There is no work to do; sleep until heartbeat timer elapses, - // or work arrives, and then iterate again. - ibrManager.waitTillNextIBR(scheduler.getHeartbeatWaitTime()); - } catch (Throwable t) { - LOG.error("Exception in IBRTaskHandler.", t); - sleepAndLogInterrupts(5000, "offering IBR service"); - } - } - } - - } - /** * Utility class that wraps the timestamp computations for scheduling * heartbeats and block reports. diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeReport.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeReport.java index 239555a8b0065..a844e1727b0a9 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeReport.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/TestDatanodeReport.java @@ -172,19 +172,8 @@ public void testDatanodeReportMissingBlock() throws Exception { // all bad datanodes } cluster.triggerHeartbeats(); // IBR delete ack - int retries = 0; - while (true) { - lb = fs.getClient().getLocatedBlocks(p.toString(), 0).get(0); - if (0 != lb.getLocations().length) { - retries++; - if (retries > 7) { - Assert.fail("getLocatedBlocks failed after 7 retries"); - } - Thread.sleep(2000); - } else { - break; - } - } + lb = fs.getClient().getLocatedBlocks(p.toString(), 0).get(0); + assertEquals(0, lb.getLocations().length); } finally { cluster.shutdown(); } @@ -234,4 +223,4 @@ static DataNode findDatanode(String id, List datanodes) { throw new IllegalStateException("Datnode " + id + " not in datanode list: " + datanodes); } -} +} \ No newline at end of file diff --git a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBlockReports.java b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBlockReports.java index e848cbfb37ffb..4221ecaf2a064 100644 --- a/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBlockReports.java +++ b/hadoop-hdfs-project/hadoop-hdfs/src/test/java/org/apache/hadoop/hdfs/server/datanode/TestIncrementalBlockReports.java @@ -25,7 +25,6 @@ import java.io.IOException; -import org.mockito.exceptions.base.MockitoAssertionError; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -157,7 +156,7 @@ public void testReportBlockDeleted() throws InterruptedException, IOException { // Sleep for a very short time since IBR is generated // asynchronously. - Thread.sleep(1000); + Thread.sleep(2000); // Ensure that no block report was generated immediately. // Deleted blocks are reported when the IBR timer elapses. @@ -168,24 +167,13 @@ public void testReportBlockDeleted() throws InterruptedException, IOException { // Trigger a heartbeat, this also triggers an IBR. DataNodeTestUtils.triggerHeartbeat(singletonDn); + Thread.sleep(2000); // Ensure that the deleted block is reported. - int retries = 0; - while (true) { - try { - Mockito.verify(nnSpy, atLeastOnce()).blockReceivedAndDeleted( - any(DatanodeRegistration.class), - anyString(), - any(StorageReceivedDeletedBlocks[].class)); - break; - } catch (MockitoAssertionError e) { - if (retries > 7) { - throw e; - } - retries++; - Thread.sleep(2000); - } - } + Mockito.verify(nnSpy, times(1)).blockReceivedAndDeleted( + any(DatanodeRegistration.class), + anyString(), + any(StorageReceivedDeletedBlocks[].class)); } finally { cluster.shutdown(); From a8818989fddb4ad53c6de969b50c98fc46bb911d Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Thu, 18 Jan 2024 19:12:12 +0800 Subject: [PATCH 008/164] HADOOP-19038. Improve create-release RUN script. (#6448) Contributed by Shilun Fan. Reviewed-by: Steve Loughran Reviewed-by: He Xiaoqiao Signed-off-by: Shilun Fan --- dev-support/bin/create-release | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dev-support/bin/create-release b/dev-support/bin/create-release index 693b41c4f3910..274250f0b7134 100755 --- a/dev-support/bin/create-release +++ b/dev-support/bin/create-release @@ -504,9 +504,9 @@ function dockermode echo "LABEL org.apache.hadoop.create-release=\"cr-${RANDOM}\"" # setup ownerships, etc - echo "RUN groupadd --non-unique -g ${group_id} ${user_name}" - echo "RUN useradd -g ${group_id} -u ${user_id} -m ${user_name}" - echo "RUN chown -R ${user_name} /home/${user_name}" + echo "RUN groupadd --non-unique -g ${group_id} ${user_name}; exit 0;" + echo "RUN useradd -g ${group_id} -u ${user_id} -m ${user_name}; exit 0;" + echo "RUN chown -R ${user_name} /home/${user_name}; exit 0;" echo "ENV HOME /home/${user_name}" echo "RUN mkdir -p /maven" echo "RUN chown -R ${user_name} /maven" From 0898b08d68c4eb1cab6006a4774ff647fd1b7bed Mon Sep 17 00:00:00 2001 From: PJ Fanning Date: Sun, 21 Jan 2024 01:13:25 +0100 Subject: [PATCH 009/164] HADOOP-18894: upgrade sshd-core due to CVEs (#6060) Contributed by PJ Fanning. Reviewed-by: He Xiaoqiao Reviewed-by: Steve Loughran Signed-off-by: Shilun Fan --- LICENSE-binary | 3 +++ hadoop-common-project/hadoop-common/pom.xml | 5 +++++ .../hadoop/fs/contract/sftp/SFTPContract.java | 7 +++---- .../hadoop/fs/sftp/TestSFTPFileSystem.java | 18 +++++++----------- hadoop-project/pom.xml | 8 +++++++- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 1ebc44b0580a3..3720a78095635 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -335,6 +335,9 @@ org.apache.kerby:kerby-pkix:2.0.3 org.apache.kerby:kerby-util:2.0.3 org.apache.kerby:kerby-xdr:2.0.3 org.apache.kerby:token-provider:2.0.3 +org.apache.sshd:sshd-common:2.11.0 +org.apache.sshd:sshd-core:2.11.0 +org.apache.sshd:sshd-sftp:2.11.0 org.apache.solr:solr-solrj:8.11.2 org.apache.yetus:audience-annotations:0.5.0 org.apache.zookeeper:zookeeper:3.8.3 diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml index 8f35d3a442f7e..e1d1683d7278a 100644 --- a/hadoop-common-project/hadoop-common/pom.xml +++ b/hadoop-common-project/hadoop-common/pom.xml @@ -316,6 +316,11 @@ sshd-core test + + org.apache.sshd + sshd-sftp + test + org.apache.ftpserver ftpserver-core diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/sftp/SFTPContract.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/sftp/SFTPContract.java index f72a2aec86242..631c89586514a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/sftp/SFTPContract.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/contract/sftp/SFTPContract.java @@ -31,12 +31,11 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.AbstractFSContract; import org.apache.hadoop.fs.sftp.SFTPFileSystem; -import org.apache.sshd.common.NamedFactory; import org.apache.sshd.server.SshServer; -import org.apache.sshd.server.auth.UserAuth; +import org.apache.sshd.server.auth.UserAuthFactory; import org.apache.sshd.server.auth.password.UserAuthPasswordFactory; import org.apache.sshd.server.keyprovider.SimpleGeneratorHostKeyProvider; -import org.apache.sshd.server.subsystem.sftp.SftpSubsystemFactory; +import org.apache.sshd.sftp.server.SftpSubsystemFactory; public class SFTPContract extends AbstractFSContract { @@ -61,7 +60,7 @@ public void init() throws IOException { sshd.setPort(0); sshd.setKeyPairProvider(new SimpleGeneratorHostKeyProvider()); - List> userAuthFactories = new ArrayList<>(); + List userAuthFactories = new ArrayList<>(); userAuthFactories.add(new UserAuthPasswordFactory()); sshd.setUserAuthFactories(userAuthFactories); diff --git a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java index e8ba5f211eb8d..e425c2dea284a 100644 --- a/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java +++ b/hadoop-common-project/hadoop-common/src/test/java/org/apache/hadoop/fs/sftp/TestSFTPFileSystem.java @@ -22,7 +22,7 @@ import java.nio.file.Files; import java.nio.file.attribute.BasicFileAttributes; import java.util.ArrayList; -import java.util.Arrays; +import java.util.Collections; import java.util.List; import org.apache.hadoop.conf.Configuration; @@ -35,18 +35,13 @@ import org.apache.hadoop.test.GenericTestUtils; import static org.apache.hadoop.test.PlatformAssumptions.assumeNotWindows; -import org.apache.sshd.common.NamedFactory; -import org.apache.sshd.server.Command; import org.apache.sshd.server.SshServer; -import org.apache.sshd.server.auth.UserAuth; +import org.apache.sshd.server.auth.UserAuthFactory; import org.apache.sshd.server.auth.password.PasswordAuthenticator; import org.apache.sshd.server.auth.password.UserAuthPasswordFactory; import org.apache.sshd.server.keyprovider.SimpleGeneratorHostKeyProvider; import org.apache.sshd.server.session.ServerSession; -import org.apache.sshd.server.subsystem.sftp.SftpSubsystemFactory; - -import org.junit.After; -import org.junit.AfterClass; +import org.apache.sshd.sftp.server.SftpSubsystemFactory; import static org.assertj.core.api.Assertions.assertThat; import static org.junit.Assert.assertArrayEquals; @@ -54,6 +49,8 @@ import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import org.junit.After; +import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Rule; @@ -82,8 +79,7 @@ private static void startSshdServer() throws IOException { sshd.setPort(0); sshd.setKeyPairProvider(new SimpleGeneratorHostKeyProvider()); - List> userAuthFactories = - new ArrayList>(); + List userAuthFactories = new ArrayList<>(); userAuthFactories.add(new UserAuthPasswordFactory()); sshd.setUserAuthFactories(userAuthFactories); @@ -100,7 +96,7 @@ public boolean authenticate(String username, String password, }); sshd.setSubsystemFactories( - Arrays.>asList(new SftpSubsystemFactory())); + Collections.singletonList(new SftpSubsystemFactory())); sshd.start(); port = sshd.getPort(); diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 65f2c0b0e5d5b..6971960de49c2 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -202,6 +202,7 @@ 1.5.4 2.0 + 2.11.0 1.7.1 2.2.4 4.13.2 @@ -1133,7 +1134,12 @@ org.apache.sshd sshd-core - 1.6.0 + ${sshd.version} + + + org.apache.sshd + sshd-sftp + ${sshd.version} org.apache.ftpserver From a013f06fe5c7e9b024a4e04a42699cb2f205f210 Mon Sep 17 00:00:00 2001 From: Ayush Saxena Date: Mon, 22 Jan 2024 12:20:27 +0530 Subject: [PATCH 010/164] Revert "HADOOP-18823. Add Labeler Github Action. (#5874). Contributed by Ayush Saxena." This reverts commit c04a17f1160e3dedcdf294d09f878136af75172a. Reverted from Branch-3.4, since this commit is relevant only for trunk. --- .github/labeler.yml | 57 ----------------------------------- .github/workflows/labeler.yml | 40 ------------------------ 2 files changed, 97 deletions(-) delete mode 100755 .github/labeler.yml delete mode 100644 .github/workflows/labeler.yml diff --git a/.github/labeler.yml b/.github/labeler.yml deleted file mode 100755 index a3fa437e0de2a..0000000000000 --- a/.github/labeler.yml +++ /dev/null @@ -1,57 +0,0 @@ -# -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -# Pull Request Labeler Github Action Configuration: https://github.com/marketplace/actions/labeler - -trunk: - - '**' -INFRA: - - .asf.yaml - - .gitattributes - - .gitignore - - .github/** - - dev-support/** - - start-build-env.sh -BUILD: - - '**/pom.xml' -COMMON: - - hadoop-common-project/** -HDFS: - - hadoop-hdfs-project/** -RBF: - - hadoop-hdfs-project/hadoop-hdfs-rbf/** -NATIVE: - - hadoop-hdfs-project/hadoop-hdfs-native-client/** - - hadoop-common-project/hadoop-common/src/main/native/** -YARN: - - hadoop-yarn-project/** -MAPREDUCE: - - hadoop-mapreduce-project/** -DISTCP: - - hadoop-tools/hadoop-distcp/** -TOOLS: - - hadoop-tools/** -AWS: - - hadoop-tools/hadoop-aws/** -ABFS: - - hadoop-tools/hadoop-azure/** -DYNAMOMETER: - - hadoop-tools/hadoop-dynamometer/** -MAVEN-PLUGINS: - - hadoop-maven-plugins/** diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml deleted file mode 100644 index f85aff05dda67..0000000000000 --- a/.github/workflows/labeler.yml +++ /dev/null @@ -1,40 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -name: "Pull Request Labeler" -on: pull_request_target - -permissions: - contents: read - pull-requests: write - -jobs: - triage: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v3 - with: - sparse-checkout: | - .github - - uses: actions/labeler@v4.3.0 - with: - repo-token: ${{ secrets.GITHUB_TOKEN }} - sync-labels: true - configuration-path: .github/labeler.yml - dot: true \ No newline at end of file From 965cb913c39712ebd5e0aea7fc0f22803dd536a0 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 11 Jan 2024 17:13:31 +0000 Subject: [PATCH 011/164] HADOOP-19004. S3A: Support Authentication through HttpSigner API (#6324) Move to the new auth flow based signers for aws. * Implement a new Signer Initialization Chain * Add a new instantiation method * Add a new test * Fix Reflection Code for SignerInitialization Contributed by Harshit Gupta --- .../org/apache/hadoop/fs/s3a/Constants.java | 16 ++ .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 20 ++- .../hadoop/fs/s3a/auth/CustomHttpSigner.java | 70 ++++++++ .../hadoop/fs/s3a/auth/SignerFactory.java | 68 ++++++++ .../hadoop/fs/s3a/impl/AWSClientConfig.java | 1 + .../hadoop/fs/s3a/impl/InternalConstants.java | 6 + .../hadoop/fs/s3a/auth/ITestHttpSigner.java | 151 ++++++++++++++++++ 7 files changed, 330 insertions(+), 2 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CustomHttpSigner.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestHttpSigner.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index fb4f22cedb9ba..c1c12b5948284 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1543,4 +1543,20 @@ private Constants() { * Value: {@value}. */ public static final boolean S3EXPRESS_CREATE_SESSION_DEFAULT = true; + + /** + * Flag to switch to a v2 SDK HTTP signer. Value {@value}. + */ + public static final String HTTP_SIGNER_ENABLED = "fs.s3a.http.signer.enabled"; + + /** + * Default value of {@link #HTTP_SIGNER_ENABLED}: {@value}. + */ + public static final boolean HTTP_SIGNER_ENABLED_DEFAULT = false; + + /** + * Classname of the http signer to use when {@link #HTTP_SIGNER_ENABLED} + * is true: {@value}. + */ + public static final String HTTP_SIGNER_CLASS_NAME = "fs.s3a.http.signer.class"; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 66e8d60689a8a..05ac5ef921c95 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -32,7 +32,9 @@ import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; import software.amazon.awssdk.core.retry.RetryPolicy; import software.amazon.awssdk.http.apache.ApacheHttpClient; +import software.amazon.awssdk.http.auth.spi.scheme.AuthScheme; import software.amazon.awssdk.http.nio.netty.NettyNioAsyncHttpClient; +import software.amazon.awssdk.identity.spi.AwsCredentialsIdentity; import software.amazon.awssdk.regions.Region; import software.amazon.awssdk.services.s3.S3AsyncClient; import software.amazon.awssdk.services.s3.S3BaseClientBuilder; @@ -52,10 +54,15 @@ import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; -import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER; +import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME; +import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED; +import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED_DEFAULT; import static org.apache.hadoop.fs.s3a.Constants.DEFAULT_SECURE_CONNECTIONS; import static org.apache.hadoop.fs.s3a.Constants.SECURE_CONNECTIONS; import static org.apache.hadoop.fs.s3a.Constants.AWS_SERVICE_IDENTIFIER_S3; +import static org.apache.hadoop.fs.s3a.auth.SignerFactory.createHttpSigner; +import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AUTH_SCHEME_AWS_SIGV_4; /** @@ -165,11 +172,19 @@ private , ClientT> Build .pathStyleAccessEnabled(parameters.isPathStyleAccess()) .build(); - return builder + S3BaseClientBuilder s3BaseClientBuilder = builder .overrideConfiguration(createClientOverrideConfiguration(parameters, conf)) .credentialsProvider(parameters.getCredentialSet()) .disableS3ExpressSessionAuth(!parameters.isExpressCreateSession()) .serviceConfiguration(serviceConfiguration); + + if (conf.getBoolean(HTTP_SIGNER_ENABLED, HTTP_SIGNER_ENABLED_DEFAULT)) { + // use an http signer through an AuthScheme + final AuthScheme signer = + createHttpSigner(conf, AUTH_SCHEME_AWS_SIGV_4, HTTP_SIGNER_CLASS_NAME); + builder.putAuthScheme(signer); + } + return (BuilderT) s3BaseClientBuilder; } /** @@ -177,6 +192,7 @@ private , ClientT> Build * @param parameters parameter object * @param conf configuration object * @throws IOException any IOE raised, or translated exception + * @throws RuntimeException some failures creating an http signer * @return the override configuration */ protected ClientOverrideConfiguration createClientOverrideConfiguration( diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CustomHttpSigner.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CustomHttpSigner.java new file mode 100644 index 0000000000000..ba1169a5e5987 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CustomHttpSigner.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.auth; + +import java.util.concurrent.CompletableFuture; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.http.auth.aws.signer.AwsV4HttpSigner; +import software.amazon.awssdk.http.auth.spi.signer.AsyncSignRequest; +import software.amazon.awssdk.http.auth.spi.signer.AsyncSignedRequest; +import software.amazon.awssdk.http.auth.spi.signer.HttpSigner; +import software.amazon.awssdk.http.auth.spi.signer.SignRequest; +import software.amazon.awssdk.http.auth.spi.signer.SignedRequest; +import software.amazon.awssdk.identity.spi.AwsCredentialsIdentity; + +/** + * Custom signer that delegates to the AWS V4 signer. + * Logs at TRACE the string value of any request. + * This is in the production code to support testing the signer plugin mechansim. + * To use + *
+ *   fs.s3a.http.signer.enabled = true
+ *   fs.s3a.http.signer.class = org.apache.hadoop.fs.s3a.auth.CustomHttpSigner
+ * 
+ */ +public final class CustomHttpSigner implements HttpSigner { + private static final Logger LOG = LoggerFactory + .getLogger(CustomHttpSigner.class); + + /** + * The delegate signer. + */ + private final HttpSigner delegateSigner; + + public CustomHttpSigner() { + delegateSigner = AwsV4HttpSigner.create(); + } + + @Override + public SignedRequest sign(SignRequest + request) { + LOG.trace("Signing request:{}", request.request()); + return delegateSigner.sign(request); + } + + @Override + public CompletableFuture signAsync( + final AsyncSignRequest request) { + + LOG.trace("Signing async request:{}", request.request()); + return delegateSigner.signAsync(request); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java index 21c390c07940b..e46fd88e85f89 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java @@ -29,12 +29,20 @@ import software.amazon.awssdk.auth.signer.AwsS3V4Signer; import software.amazon.awssdk.core.signer.NoOpSigner; import software.amazon.awssdk.core.signer.Signer; +import software.amazon.awssdk.http.auth.spi.scheme.AuthScheme; +import software.amazon.awssdk.http.auth.spi.signer.HttpSigner; +import software.amazon.awssdk.identity.spi.AwsCredentialsIdentity; +import software.amazon.awssdk.identity.spi.IdentityProvider; +import software.amazon.awssdk.identity.spi.IdentityProviders; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; +import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME; import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.unavailable; import static org.apache.hadoop.util.Preconditions.checkArgument; +import static org.apache.hadoop.util.Preconditions.checkState; /** * Signer factory used to register and create signers. @@ -119,4 +127,64 @@ public static Signer createSigner(String signerType, String configKey) throws IO return signer; } + + /** + * Create an auth scheme instance from an ID and a signer. + * @param schemeId scheme id + * @param signer signer + * @return the auth scheme + */ + public static AuthScheme createAuthScheme( + String schemeId, + HttpSigner signer) { + + return new AuthScheme() { + @Override + public String schemeId() { + return schemeId; + } + @Override + public IdentityProvider identityProvider( + IdentityProviders providers) { + return providers.identityProvider(AwsCredentialsIdentity.class); + } + @Override + public HttpSigner signer() { + return signer; + } + }; + } + + /** + * Create an auth scheme by looking up the signer class in the configuration, + * loading and instantiating it. + * @param conf configuration + * @param scheme scheme to bond to + * @param configKey configuration key + * @return the auth scheme + * @throws InstantiationIOException failure to instantiate + * @throws IllegalStateException if the signer class is not defined + * @throws RuntimeException other configuration problems + */ + public static AuthScheme createHttpSigner( + Configuration conf, String scheme, String configKey) throws IOException { + + final Class clazz = conf.getClass(HTTP_SIGNER_CLASS_NAME, + null, HttpSigner.class); + checkState(clazz != null, "No http signer class defined in %s", configKey); + LOG.debug("Creating http signer {} from {}", clazz, configKey); + try { + return createAuthScheme(scheme, clazz.newInstance()); + + } catch (InstantiationException | IllegalAccessException e) { + throw new InstantiationIOException( + InstantiationIOException.Kind.InstantiationFailure, + null, + clazz.getName(), + HTTP_SIGNER_CLASS_NAME, + e.toString(), + e); + } + } + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index 263562fe8a704..f6da9d84e0a77 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -105,6 +105,7 @@ private AWSClientConfig() { * @param awsServiceIdentifier service * @return the builder inited with signer, timeouts and UA. * @throws IOException failure. + * @throws RuntimeException some failures creating an http signer */ public static ClientOverrideConfiguration.Builder createClientConfigBuilder(Configuration conf, String awsServiceIdentifier) throws IOException { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java index cd78350a5d024..1148f6fcd4831 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java @@ -286,4 +286,10 @@ private InternalConstants() { FS_S3A_CREATE_PERFORMANCE_ENABLED, DIRECTORY_OPERATIONS_PURGE_UPLOADS, ENABLE_MULTI_DELETE)); + + /** + * AWS V4 Auth Scheme to use when creating signers: {@value}. + */ + public static final String AUTH_SCHEME_AWS_SIGV_4 = "aws.auth#sigv4"; + } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestHttpSigner.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestHttpSigner.java new file mode 100644 index 0000000000000..db0aaa6be0eca --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestHttpSigner.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.auth; + +import java.io.IOException; +import java.security.PrivilegedExceptionAction; + +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.ContentSummary; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; +import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; +import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.security.UserGroupInformation; + +import static org.apache.hadoop.fs.s3a.Constants.CUSTOM_SIGNERS; +import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME; +import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM_S3; +import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED; +import static org.apache.hadoop.fs.s3a.MultipartTestUtils.createMagicFile; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; + +/** + * Test the HTTP signer SPI. + * Two different UGIs are created; ths simplifies cleanup. + */ +public class ITestHttpSigner extends AbstractS3ATestBase { + private static final Logger LOG = LoggerFactory + .getLogger(ITestHttpSigner.class); + + private static final String TEST_ID_KEY = "TEST_ID_KEY"; + private static final String TEST_REGION_KEY = "TEST_REGION_KEY"; + + private final UserGroupInformation ugi1 = UserGroupInformation.createRemoteUser("user1"); + + private final UserGroupInformation ugi2 = UserGroupInformation.createRemoteUser("user2"); + + private String regionName; + + private String endpoint; + + @Override + public void setup() throws Exception { + super.setup(); + final S3AFileSystem fs = getFileSystem(); + final Configuration conf = fs.getConf(); + // determine the endpoint -skipping the test. + endpoint = conf.getTrimmed(Constants.ENDPOINT, Constants.CENTRAL_ENDPOINT); + LOG.debug("Test endpoint is {}", endpoint); + regionName = conf.getTrimmed(Constants.AWS_REGION, ""); + if (regionName.isEmpty()) { + regionName = determineRegion(fs.getBucket()); + } + LOG.debug("Determined region name to be [{}] for bucket [{}]", regionName, + fs.getBucket()); + } + + private String determineRegion(String bucketName) throws IOException { + return getS3AInternals().getBucketLocation(bucketName); + } + + @Override + public void teardown() throws Exception { + super.teardown(); + FileSystem.closeAllForUGI(ugi1); + FileSystem.closeAllForUGI(ugi2); + } + + private Configuration createTestConfig(String identifier) { + Configuration conf = createConfiguration(); + + removeBaseAndBucketOverrides(conf, + CUSTOM_SIGNERS, + SIGNING_ALGORITHM_S3); + + conf.setBoolean(HTTP_SIGNER_ENABLED, true); + conf.set(HTTP_SIGNER_CLASS_NAME, CustomHttpSigner.class.getName()); + + conf.set(TEST_ID_KEY, identifier); + conf.set(TEST_REGION_KEY, regionName); + + // make absolutely sure there is no caching. + disableFilesystemCaching(conf); + + return conf; + } + + @Test + public void testCustomSignerAndInitializer() + throws IOException, InterruptedException { + + final Path basePath = path(getMethodName()); + FileSystem fs1 = runStoreOperationsAndVerify(ugi1, + new Path(basePath, "customsignerpath1"), "id1"); + + FileSystem fs2 = runStoreOperationsAndVerify(ugi2, + new Path(basePath, "customsignerpath2"), "id2"); + } + + private S3AFileSystem runStoreOperationsAndVerify(UserGroupInformation ugi, + Path finalPath, String identifier) + throws IOException, InterruptedException { + Configuration conf = createTestConfig(identifier); + return ugi.doAs((PrivilegedExceptionAction) () -> { + S3AFileSystem fs = (S3AFileSystem)finalPath.getFileSystem(conf); + + fs.mkdirs(finalPath); + + // now do some more operations to make sure all is good. + final Path subdir = new Path(finalPath, "year=1970/month=1/day=1"); + fs.mkdirs(subdir); + + final Path file1 = new Path(subdir, "file1"); + ContractTestUtils.touch(fs, new Path(subdir, "file1")); + fs.listStatus(subdir); + fs.delete(file1, false); + ContractTestUtils.touch(fs, new Path(subdir, "file1")); + + // create a magic file. + createMagicFile(fs, subdir); + ContentSummary summary = fs.getContentSummary(finalPath); + fs.getS3AInternals().abortMultipartUploads(subdir); + fs.rename(subdir, new Path(finalPath, "renamed")); + fs.delete(finalPath, true); + return fs; + }); + } +} From 8c1bc42bf0789a8685545529209930fd67ef112a Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 16 Jan 2024 14:14:03 +0000 Subject: [PATCH 012/164] HADOOP-19027. S3A: S3AInputStream doesn't recover from HTTP/channel exceptions (#6425) Differentiate from "EOF out of range/end of GET" from "EOF channel problems" through two different subclasses of EOFException and input streams to always retry on http channel errors; out of range GET requests are not retried. Currently an EOFException is always treated as a fail-fast call in read() This allows for all existing external code catching EOFException to handle both, but S3AInputStream to cleanly differentiate range errors (map to -1) from channel errors (retry) - HttpChannelEOFException is subclass of EOFException, so all code which catches EOFException is still happy. retry policy: connectivityFailure - RangeNotSatisfiableEOFException is the subclass of EOFException raised on 416 GET range errors. retry policy: fail - Method ErrorTranslation.maybeExtractChannelException() to create this from shaded/unshaded NoHttpResponseException, using string match to avoid classpath problems. - And do this for SdkClientExceptions with OpenSSL error code WFOPENSSL0035. We believe this is the OpenSSL equivalent. - ErrorTranslation.maybeExtractIOException() to perform this translation as appropriate. S3AInputStream.reopen() code retries on EOF, except on RangeNotSatisfiableEOFException, which is converted to a -1 response to the caller as is done historically. S3AInputStream knows to handle these with read(): HttpChannelEOFException: stream aborting close then retry lazySeek(): Map RangeNotSatisfiableEOFException to -1, but do not map any other EOFException class raised. This means that * out of range reads map to -1 * channel problems in reopen are retried * channel problems in read() abort the failed http connection so it isn't recycled Tests for this using/abusing mocking. Testing through actually raising 416 exceptions and verifying that readFully(), char read() and vector reads are all good. There is no attempt to recover within a readFully(); there's a boolean constant switch to turn this on, but if anyone does it a test will spin forever as the inner PositionedReadable.read(position, buffer, len) downgrades all EOF exceptions to -1. A new method would need to be added which controls whether to downgrade/rethrow exceptions. What does that mean? Possibly reduced resilience to non-retried failures on the inner stream, even though more channel exceptions are retried on. Contributed by Steve Loughran --- .../fs/s3a/HttpChannelEOFException.java | 42 +++ .../org/apache/hadoop/fs/s3a/Invoker.java | 2 +- .../s3a/RangeNotSatisfiableEOFException.java | 39 +++ .../apache/hadoop/fs/s3a/S3AInputStream.java | 83 ++++-- .../apache/hadoop/fs/s3a/S3ARetryPolicy.java | 13 +- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 22 +- .../fs/s3a/audit/AWSRequestAnalyzer.java | 7 +- .../auth/IAMInstanceCredentialsProvider.java | 3 +- .../hadoop/fs/s3a/impl/ErrorTranslation.java | 97 ++++++- .../s3a/ITestS3AContractVectoredRead.java | 57 +++- .../apache/hadoop/fs/s3a/S3ATestUtils.java | 59 ++++ .../fs/s3a/TestS3AExceptionTranslation.java | 125 ++++++++- .../fs/s3a/TestS3AInputStreamRetry.java | 242 ++++++++++++---- .../fs/s3a/impl/TestErrorTranslation.java | 10 +- .../fs/s3a/performance/ITestS3AOpenCost.java | 258 +++++++++++++++--- 15 files changed, 912 insertions(+), 147 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/HttpChannelEOFException.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RangeNotSatisfiableEOFException.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/HttpChannelEOFException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/HttpChannelEOFException.java new file mode 100644 index 0000000000000..665d485d7ee54 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/HttpChannelEOFException.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.EOFException; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Http channel exception; subclass of EOFException. + * In particular: + * - NoHttpResponseException + * - OpenSSL errors + * The http client library exceptions may be shaded/unshaded; this is the + * exception used in retry policies. + */ +@InterfaceAudience.Private +public class HttpChannelEOFException extends EOFException { + + public HttpChannelEOFException(final String path, + final String error, + final Throwable cause) { + super(error); + initCause(cause); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java index 9b2c95a90c76f..286e4e00a4678 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Invoker.java @@ -478,7 +478,7 @@ public T retryUntranslated( if (caught instanceof IOException) { translated = (IOException) caught; } else { - translated = S3AUtils.translateException(text, "", + translated = S3AUtils.translateException(text, "/", (SdkException) caught); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RangeNotSatisfiableEOFException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RangeNotSatisfiableEOFException.java new file mode 100644 index 0000000000000..4c6b9decb0b4d --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/RangeNotSatisfiableEOFException.java @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.EOFException; + +import org.apache.hadoop.classification.InterfaceAudience; + +/** + * Status code 416, range not satisfiable. + * Subclass of {@link EOFException} so that any code which expects that to + * be the outcome of a 416 failure will continue to work. + */ +@InterfaceAudience.Private +public class RangeNotSatisfiableEOFException extends EOFException { + + public RangeNotSatisfiableEOFException( + String operation, + Exception cause) { + super(operation); + initCause(cause); + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java index 2ed9083efcddd..3d2ecc77376bf 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java @@ -99,6 +99,14 @@ public class S3AInputStream extends FSInputStream implements CanSetReadahead, public static final String OPERATION_OPEN = "open"; public static final String OPERATION_REOPEN = "re-open"; + /** + * Switch for behavior on when wrappedStream.read() + * returns -1 or raises an EOF; the original semantics + * are that the stream is kept open. + * Value {@value}. + */ + private static final boolean CLOSE_WRAPPED_STREAM_ON_NEGATIVE_READ = true; + /** * This is the maximum temporary buffer size we use while * populating the data in direct byte buffers during a vectored IO @@ -333,7 +341,7 @@ private void seekQuietly(long positiveTargetPos) { @Retries.OnceTranslated private void seekInStream(long targetPos, long length) throws IOException { checkNotClosed(); - if (wrappedStream == null) { + if (!isObjectStreamOpen()) { return; } // compute how much more to skip @@ -406,22 +414,29 @@ public boolean seekToNewSource(long targetPos) throws IOException { /** * Perform lazy seek and adjust stream to correct position for reading. - * + * If an EOF Exception is raised there are two possibilities + *
    + *
  1. the stream is at the end of the file
  2. + *
  3. something went wrong with the network connection
  4. + *
+ * This method does not attempt to distinguish; it assumes that an EOF + * exception is always "end of file". * @param targetPos position from where data should be read * @param len length of the content that needs to be read + * @throws RangeNotSatisfiableEOFException GET is out of range + * @throws IOException anything else. */ @Retries.RetryTranslated private void lazySeek(long targetPos, long len) throws IOException { Invoker invoker = context.getReadInvoker(); - invoker.maybeRetry(streamStatistics.getOpenOperations() == 0, - "lazySeek", pathStr, true, + invoker.retry("lazySeek to " + targetPos, pathStr, true, () -> { //For lazy seek seekInStream(targetPos, len); //re-open at specific location if needed - if (wrappedStream == null) { + if (!isObjectStreamOpen()) { reopen("read from new offset", targetPos, len, false); } }); @@ -449,7 +464,9 @@ public synchronized int read() throws IOException { try { lazySeek(nextReadPos, 1); - } catch (EOFException e) { + } catch (RangeNotSatisfiableEOFException e) { + // attempt to GET beyond the end of the object + LOG.debug("Downgrading 416 response attempt to read at {} to -1 response", nextReadPos); return -1; } @@ -460,14 +477,12 @@ public synchronized int read() throws IOException { // When exception happens before re-setting wrappedStream in "reopen" called // by onReadFailure, then wrappedStream will be null. But the **retry** may // re-execute this block and cause NPE if we don't check wrappedStream - if (wrappedStream == null) { + if (!isObjectStreamOpen()) { reopen("failure recovery", getPos(), 1, false); } try { b = wrappedStream.read(); - } catch (EOFException e) { - return -1; - } catch (SocketTimeoutException e) { + } catch (HttpChannelEOFException | SocketTimeoutException e) { onReadFailure(e, true); throw e; } catch (IOException e) { @@ -480,10 +495,9 @@ public synchronized int read() throws IOException { if (byteRead >= 0) { pos++; nextReadPos++; - } - - if (byteRead >= 0) { incrementBytesRead(1); + } else { + streamReadResultNegative(); } return byteRead; } @@ -509,6 +523,18 @@ private void onReadFailure(IOException ioe, boolean forceAbort) { closeStream("failure recovery", forceAbort, false); } + /** + * the read() call returned -1. + * this means "the connection has gone past the end of the object" or + * the stream has broken for some reason. + * so close stream (without an abort). + */ + private void streamReadResultNegative() { + if (CLOSE_WRAPPED_STREAM_ON_NEGATIVE_READ) { + closeStream("wrappedStream.read() returned -1", false, false); + } + } + /** * {@inheritDoc} * @@ -534,8 +560,8 @@ public synchronized int read(byte[] buf, int off, int len) try { lazySeek(nextReadPos, len); - } catch (EOFException e) { - // the end of the file has moved + } catch (RangeNotSatisfiableEOFException e) { + // attempt to GET beyond the end of the object return -1; } @@ -548,17 +574,19 @@ public synchronized int read(byte[] buf, int off, int len) // When exception happens before re-setting wrappedStream in "reopen" called // by onReadFailure, then wrappedStream will be null. But the **retry** may // re-execute this block and cause NPE if we don't check wrappedStream - if (wrappedStream == null) { + if (!isObjectStreamOpen()) { reopen("failure recovery", getPos(), 1, false); } try { + // read data; will block until there is data or the end of the stream is reached. + // returns 0 for "stream is open but no data yet" and -1 for "end of stream". bytes = wrappedStream.read(buf, off, len); - } catch (EOFException e) { - // the base implementation swallows EOFs. - return -1; - } catch (SocketTimeoutException e) { + } catch (HttpChannelEOFException | SocketTimeoutException e) { onReadFailure(e, true); throw e; + } catch (EOFException e) { + LOG.debug("EOFException raised by http stream read(); downgrading to a -1 response", e); + return -1; } catch (IOException e) { onReadFailure(e, false); throw e; @@ -569,8 +597,10 @@ public synchronized int read(byte[] buf, int off, int len) if (bytesRead > 0) { pos += bytesRead; nextReadPos += bytesRead; + incrementBytesRead(bytesRead); + } else { + streamReadResultNegative(); } - incrementBytesRead(bytesRead); streamStatistics.readOperationCompleted(len, bytesRead); return bytesRead; } @@ -818,6 +848,9 @@ public void readFully(long position, byte[] buffer, int offset, int length) while (nread < length) { int nbytes = read(buffer, offset + nread, length - nread); if (nbytes < 0) { + // no attempt is currently made to recover from stream read problems; + // a lazy seek to the offset is probably the solution. + // but it will need more qualification against failure handling throw new EOFException(FSExceptionMessages.EOF_IN_READ_FULLY); } nread += nbytes; @@ -987,7 +1020,7 @@ private void validateRangeRequest(FileRange range) throws EOFException { final String errMsg = String.format("Requested range [%d, %d) is beyond EOF for path %s", range.getOffset(), range.getLength(), pathStr); LOG.warn(errMsg); - throw new EOFException(errMsg); + throw new RangeNotSatisfiableEOFException(errMsg, null); } } @@ -1257,8 +1290,12 @@ public boolean hasCapability(String capability) { } } + /** + * Is the inner object stream open? + * @return true if there is an active HTTP request to S3. + */ @VisibleForTesting - boolean isObjectStreamOpen() { + public boolean isObjectStreamOpen() { return wrappedStream != null; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java index 9438ac22bdb19..faf105c8e2c86 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ARetryPolicy.java @@ -209,9 +209,15 @@ protected Map, RetryPolicy> createExceptionMap() { // in this map. policyMap.put(AWSClientIOException.class, retryAwsClientExceptions); + // Http Channel issues: treat as communication failure + policyMap.put(HttpChannelEOFException.class, connectivityFailure); + // server didn't respond. policyMap.put(AWSNoResponseException.class, retryIdempotentCalls); + // range header is out of scope of object; retrying won't help + policyMap.put(RangeNotSatisfiableEOFException.class, fail); + // should really be handled by resubmitting to new location; // that's beyond the scope of this retry policy policyMap.put(AWSRedirectException.class, fail); @@ -251,10 +257,7 @@ protected Map, RetryPolicy> createExceptionMap() { policyMap.put(ConnectException.class, connectivityFailure); // this can be a sign of an HTTP connection breaking early. - // which can be reacted to by another attempt if the request was idempotent. - // But: could also be a sign of trying to read past the EOF on a GET, - // which isn't going to be recovered from - policyMap.put(EOFException.class, retryIdempotentCalls); + policyMap.put(EOFException.class, connectivityFailure); // object not found. 404 when not unknown bucket; 410 "gone" policyMap.put(FileNotFoundException.class, fail); @@ -300,7 +303,7 @@ public RetryAction shouldRetry(Exception exception, if (exception instanceof SdkException) { // update the sdk exception for the purpose of exception // processing. - ex = S3AUtils.translateException("", "", (SdkException) exception); + ex = S3AUtils.translateException("", "/", (SdkException) exception); } LOG.debug("Retry probe for {} with {} retries and {} failovers," + " idempotent={}, due to {}", diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 6ef0cd8dc9938..6a719739e720e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -167,13 +167,20 @@ public static IOException translateException(String operation, */ @SuppressWarnings("ThrowableInstanceNeverThrown") public static IOException translateException(@Nullable String operation, - String path, + @Nullable String path, SdkException exception) { String message = String.format("%s%s: %s", operation, StringUtils.isNotEmpty(path)? (" on " + path) : "", exception); + if (path == null || path.isEmpty()) { + // handle null path by giving it a stub value. + // not ideal/informative, but ensures that the path is never null in + // exceptions constructed. + path = "/"; + } + if (!(exception instanceof AwsServiceException)) { // exceptions raised client-side: connectivity, auth, network problems... Exception innerCause = containsInterruptedException(exception); @@ -196,7 +203,7 @@ public static IOException translateException(@Nullable String operation, return ioe; } // network problems covered by an IOE inside the exception chain. - ioe = maybeExtractIOException(path, exception); + ioe = maybeExtractIOException(path, exception, message); if (ioe != null) { return ioe; } @@ -300,10 +307,13 @@ public static IOException translateException(@Nullable String operation, break; // out of range. This may happen if an object is overwritten with - // a shorter one while it is being read. + // a shorter one while it is being read or openFile() was invoked + // passing a FileStatus or file length less than that of the object. + // although the HTTP specification says that the response should + // include a range header specifying the actual range available, + // this isn't picked up here. case SC_416_RANGE_NOT_SATISFIABLE: - ioe = new EOFException(message); - ioe.initCause(ase); + ioe = new RangeNotSatisfiableEOFException(message, ase); break; // this has surfaced as a "no response from server" message. @@ -673,7 +683,7 @@ public static InstanceT getInstanceFromReflection(String className, if (targetException instanceof IOException) { throw (IOException) targetException; } else if (targetException instanceof SdkException) { - throw translateException("Instantiate " + className, "", (SdkException) targetException); + throw translateException("Instantiate " + className, "/", (SdkException) targetException); } else { // supported constructor or factory method found, but the call failed throw instantiationException(uri, className, configKey, targetException); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java index 3cb8d97532448..3df862055d197 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java @@ -294,6 +294,11 @@ private static long toSafeLong(final Number size) { private static final String BYTES_PREFIX = "bytes="; + /** + * Given a range header, determine the size of the request. + * @param rangeHeader header string + * @return parsed size or -1 for problems + */ private static Number sizeFromRangeHeader(String rangeHeader) { if (rangeHeader != null && rangeHeader.startsWith(BYTES_PREFIX)) { String[] values = rangeHeader @@ -302,7 +307,7 @@ private static Number sizeFromRangeHeader(String rangeHeader) { if (values.length == 2) { try { long start = Long.parseUnsignedLong(values[0]); - long end = Long.parseUnsignedLong(values[0]); + long end = Long.parseUnsignedLong(values[1]); return end - start; } catch(NumberFormatException e) { } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java index 080b79e7f20d5..b9a7c776b1405 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java @@ -101,7 +101,8 @@ public AwsCredentials resolveCredentials() { // if the exception contains an IOE, extract it // so its type is the immediate cause of this new exception. Throwable t = e; - final IOException ioe = maybeExtractIOException("IAM endpoint", e); + final IOException ioe = maybeExtractIOException("IAM endpoint", e, + "resolveCredentials()"); if (ioe != null) { t = ioe; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java index f8a1f907bb3b1..7934a5c7d4d5c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ErrorTranslation.java @@ -23,8 +23,11 @@ import software.amazon.awssdk.awscore.exception.AwsServiceException; +import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.s3a.HttpChannelEOFException; import org.apache.hadoop.fs.PathIOException; +import static org.apache.commons.lang3.StringUtils.isNotEmpty; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_404_NOT_FOUND; /** @@ -42,6 +45,24 @@ */ public final class ErrorTranslation { + /** + * OpenSSL stream closed error: {@value}. + * See HADOOP-19027. + */ + public static final String OPENSSL_STREAM_CLOSED = "WFOPENSSL0035"; + + /** + * Classname of unshaded Http Client exception: {@value}. + */ + private static final String RAW_NO_HTTP_RESPONSE_EXCEPTION = + "org.apache.http.NoHttpResponseException"; + + /** + * Classname of shaded Http Client exception: {@value}. + */ + private static final String SHADED_NO_HTTP_RESPONSE_EXCEPTION = + "software.amazon.awssdk.thirdparty.org.apache.http.NoHttpResponseException"; + /** * Private constructor for utility class. */ @@ -71,25 +92,51 @@ public static boolean isObjectNotFound(AwsServiceException e) { return e.statusCode() == SC_404_NOT_FOUND && !isUnknownBucket(e); } + /** + * Tail recursive extraction of the innermost throwable. + * @param thrown next thrown in chain. + * @param outer outermost. + * @return the last non-null throwable in the chain. + */ + private static Throwable getInnermostThrowable(Throwable thrown, Throwable outer) { + if (thrown == null) { + return outer; + } + return getInnermostThrowable(thrown.getCause(), thrown); + } + /** * Translate an exception if it or its inner exception is an * IOException. - * If this condition is not met, null is returned. + * This also contains the logic to extract an AWS HTTP channel exception, + * which may or may not be an IOE, depending on the underlying SSL implementation + * in use. + * If an IOException cannot be extracted, null is returned. * @param path path of operation. * @param thrown exception + * @param message message generated by the caller. * @return a translated exception or null. */ - public static IOException maybeExtractIOException(String path, Throwable thrown) { + public static IOException maybeExtractIOException( + String path, + Throwable thrown, + String message) { if (thrown == null) { return null; } - // look inside - Throwable cause = thrown.getCause(); - while (cause != null && cause.getCause() != null) { - cause = cause.getCause(); + // walk down the chain of exceptions to find the innermost. + Throwable cause = getInnermostThrowable(thrown.getCause(), thrown); + + // see if this is an http channel exception + HttpChannelEOFException channelException = + maybeExtractChannelException(path, message, cause); + if (channelException != null) { + return channelException; } + + // not a channel exception, not an IOE. if (!(cause instanceof IOException)) { return null; } @@ -102,8 +149,7 @@ public static IOException maybeExtractIOException(String path, Throwable thrown) // unless no suitable constructor is available. final IOException ioe = (IOException) cause; - return wrapWithInnerIOE(path, thrown, ioe); - + return wrapWithInnerIOE(path, message, thrown, ioe); } /** @@ -116,6 +162,7 @@ public static IOException maybeExtractIOException(String path, Throwable thrown) * See {@code NetUtils}. * @param type of inner exception. * @param path path of the failure. + * @param message message generated by the caller. * @param outer outermost exception. * @param inner inner exception. * @return the new exception. @@ -123,9 +170,12 @@ public static IOException maybeExtractIOException(String path, Throwable thrown) @SuppressWarnings("unchecked") private static IOException wrapWithInnerIOE( String path, + String message, Throwable outer, T inner) { - String msg = outer.toString() + ": " + inner.getMessage(); + String msg = (isNotEmpty(message) ? (message + ":" + + " ") : "") + + outer.toString() + ": " + inner.getMessage(); Class clazz = inner.getClass(); try { Constructor ctor = clazz.getConstructor(String.class); @@ -136,6 +186,35 @@ private static IOException wrapWithInnerIOE( } } + /** + * Extract an AWS HTTP channel exception if the inner exception is considered + * an HttpClient {@code NoHttpResponseException} or an OpenSSL channel exception. + * This is based on string matching, which is inelegant and brittle. + * @param path path of the failure. + * @param message message generated by the caller. + * @param thrown inner exception. + * @return the new exception. + */ + @VisibleForTesting + public static HttpChannelEOFException maybeExtractChannelException( + String path, + String message, + Throwable thrown) { + final String classname = thrown.getClass().getName(); + if (thrown instanceof IOException + && (classname.equals(RAW_NO_HTTP_RESPONSE_EXCEPTION) + || classname.equals(SHADED_NO_HTTP_RESPONSE_EXCEPTION))) { + // shaded or unshaded http client exception class + return new HttpChannelEOFException(path, message, thrown); + } + // there's ambiguity about what exception class this is + // so rather than use its type, we look for an OpenSSL string in the message + if (thrown.getMessage().contains(OPENSSL_STREAM_CLOSED)) { + return new HttpChannelEOFException(path, message, thrown); + } + return null; + } + /** * AWS error codes explicitly recognized and processes specially; * kept in their own class for isolation. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractVectoredRead.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractVectoredRead.java index 4c357e288c84f..9966393d41fdb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractVectoredRead.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/contract/s3a/ITestS3AContractVectoredRead.java @@ -21,9 +21,12 @@ import java.io.EOFException; import java.io.IOException; import java.io.InterruptedIOException; +import java.nio.ByteBuffer; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.concurrent.CompletableFuture; +import java.util.concurrent.TimeUnit; import org.junit.Test; import org.slf4j.Logger; @@ -36,7 +39,9 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.contract.AbstractContractVectoredReadTest; import org.apache.hadoop.fs.contract.AbstractFSContract; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.Constants; +import org.apache.hadoop.fs.s3a.RangeNotSatisfiableEOFException; import org.apache.hadoop.fs.s3a.S3AFileSystem; import org.apache.hadoop.fs.s3a.S3ATestUtils; import org.apache.hadoop.fs.statistics.IOStatistics; @@ -44,10 +49,13 @@ import org.apache.hadoop.fs.statistics.StreamStatisticNames; import org.apache.hadoop.test.LambdaTestUtils; +import static org.apache.hadoop.fs.FSExceptionMessages.EOF_IN_READ_FULLY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; import static org.apache.hadoop.fs.contract.ContractTestUtils.returnBuffersToPoolPostRead; import static org.apache.hadoop.fs.contract.ContractTestUtils.validateVectoredReadResult; import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.verifyStatisticCounterValue; import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToPrettyString; +import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; import static org.apache.hadoop.test.MoreAsserts.assertEqual; public class ITestS3AContractVectoredRead extends AbstractContractVectoredReadTest { @@ -72,7 +80,54 @@ public void testEOFRanges() throws Exception { FileSystem fs = getFileSystem(); List fileRanges = new ArrayList<>(); fileRanges.add(FileRange.createFileRange(DATASET_LEN, 100)); - verifyExceptionalVectoredRead(fs, fileRanges, EOFException.class); + verifyExceptionalVectoredRead(fs, fileRanges, RangeNotSatisfiableEOFException.class); + } + + /** + * Verify response to a vector read request which is beyond the + * real length of the file. + * Unlike the {@link #testEOFRanges()} test, the input stream in + * this test thinks the file is longer than it is, so the call + * fails in the GET request. + */ + @Test + public void testEOFRanges416Handling() throws Exception { + FileSystem fs = getFileSystem(); + + final int extendedLen = DATASET_LEN + 1024; + CompletableFuture builder = + fs.openFile(path(VECTORED_READ_FILE_NAME)) + .mustLong(FS_OPTION_OPENFILE_LENGTH, extendedLen) + .build(); + List fileRanges = new ArrayList<>(); + fileRanges.add(FileRange.createFileRange(DATASET_LEN, 100)); + + describe("Read starting from past EOF"); + try (FSDataInputStream in = builder.get()) { + in.readVectored(fileRanges, getAllocate()); + FileRange res = fileRanges.get(0); + CompletableFuture data = res.getData(); + interceptFuture(RangeNotSatisfiableEOFException.class, + "416", + ContractTestUtils.VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS, + TimeUnit.SECONDS, + data); + } + + describe("Read starting 0 continuing past EOF"); + try (FSDataInputStream in = fs.openFile(path(VECTORED_READ_FILE_NAME)) + .mustLong(FS_OPTION_OPENFILE_LENGTH, extendedLen) + .build().get()) { + final FileRange range = FileRange.createFileRange(0, extendedLen); + in.readVectored(Arrays.asList(range), getAllocate()); + CompletableFuture data = range.getData(); + interceptFuture(EOFException.class, + EOF_IN_READ_FULLY, + ContractTestUtils.VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS, + TimeUnit.SECONDS, + data); + } + } @Test diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index 0d4cf6a2962d8..6dc3ca11028a6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -19,6 +19,7 @@ package org.apache.hadoop.fs.s3a; import org.apache.commons.lang3.StringUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; @@ -72,6 +73,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.core.exception.SdkClientException; import java.io.Closeable; import java.io.File; @@ -456,6 +458,8 @@ public static E verifyExceptionClass(Class clazz, .describedAs("Exception expected of class %s", clazz) .isNotNull(); if (!(ex.getClass().equals(clazz))) { + LOG.warn("Rethrowing exception: {} as it is not an instance of {}", + ex, clazz, ex); throw ex; } return (E)ex; @@ -1711,4 +1715,59 @@ public static String etag(FileStatus status) { "Not an EtagSource: %s", status); return ((EtagSource) status).getEtag(); } + + /** + * Create an SDK client exception. + * @param message message + * @param cause nullable cause + * @return the exception + */ + public static SdkClientException sdkClientException( + String message, Throwable cause) { + return SdkClientException.builder() + .message(message) + .cause(cause) + .build(); + } + + /** + * Create an SDK client exception using the string value of the cause + * as the message. + * @param cause nullable cause + * @return the exception + */ + public static SdkClientException sdkClientException( + Throwable cause) { + return SdkClientException.builder() + .message(cause.toString()) + .cause(cause) + .build(); + } + + private static final String BYTES_PREFIX = "bytes="; + + /** + * Given a range header, split into start and end. + * Based on AWSRequestAnalyzer. + * @param rangeHeader header string + * @return parse range, or (-1, -1) for problems + */ + public static Pair requestRange(String rangeHeader) { + if (rangeHeader != null && rangeHeader.startsWith(BYTES_PREFIX)) { + String[] values = rangeHeader + .substring(BYTES_PREFIX.length()) + .split("-"); + if (values.length == 2) { + try { + long start = Long.parseUnsignedLong(values[0]); + long end = Long.parseUnsignedLong(values[1]); + return Pair.of(start, end); + } catch (NumberFormatException e) { + LOG.warn("Failed to parse range header {}", rangeHeader, e); + } + } + } + // error case + return Pair.of(-1L, -1L); + } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java index b26ca6889bd1b..6b894a6813704 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AExceptionTranslation.java @@ -20,9 +20,11 @@ import static org.apache.hadoop.fs.s3a.AWSCredentialProviderList.maybeTranslateCredentialException; import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.sdkClientException; import static org.apache.hadoop.fs.s3a.S3ATestUtils.verifyExceptionClass; import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.audit.AuditIntegration.maybeTranslateAuditException; +import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.maybeExtractChannelException; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.*; import static org.apache.hadoop.test.LambdaTestUtils.verifyCause; import static org.junit.Assert.*; @@ -36,11 +38,11 @@ import java.util.function.Consumer; import org.assertj.core.api.Assertions; +import org.junit.Before; import software.amazon.awssdk.awscore.exception.AwsErrorDetails; import software.amazon.awssdk.awscore.exception.AwsServiceException; import software.amazon.awssdk.core.exception.ApiCallAttemptTimeoutException; import software.amazon.awssdk.core.exception.ApiCallTimeoutException; -import software.amazon.awssdk.core.exception.SdkClientException; import software.amazon.awssdk.core.exception.SdkException; import software.amazon.awssdk.http.SdkHttpResponse; import software.amazon.awssdk.services.s3.model.S3Exception; @@ -53,15 +55,32 @@ import org.apache.hadoop.fs.s3a.audit.AuditOperationRejectedException; import org.apache.hadoop.fs.s3a.impl.ErrorTranslation; import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.test.AbstractHadoopTestBase; +import org.apache.http.NoHttpResponseException; import static org.apache.hadoop.test.GenericTestUtils.assertExceptionContains; /** - * Unit test suite covering translation of AWS SDK exceptions to S3A exceptions, + * Unit test suite covering translation of AWS/network exceptions to S3A exceptions, * and retry/recovery policies. */ @SuppressWarnings("ThrowableNotThrown") -public class TestS3AExceptionTranslation { +public class TestS3AExceptionTranslation extends AbstractHadoopTestBase { + + public static final String WFOPENSSL_0035_STREAM_IS_CLOSED = + "Unable to execute HTTP request: " + + ErrorTranslation.OPENSSL_STREAM_CLOSED + + " Stream is closed"; + + /** + * Retry policy to use in tests. + */ + private S3ARetryPolicy retryPolicy; + + @Before + public void setup() { + retryPolicy = new S3ARetryPolicy(new Configuration(false)); + } @Test public void test301ContainsRegion() throws Exception { @@ -91,10 +110,10 @@ protected void assertContained(String text, String contained) { text != null && text.contains(contained)); } - protected void verifyTranslated( + protected E verifyTranslated( int status, Class expected) throws Exception { - verifyTranslated(expected, createS3Exception(status)); + return verifyTranslated(expected, createS3Exception(status)); } @Test @@ -142,7 +161,12 @@ public void test410isNotFound() throws Exception { @Test public void test416isEOF() throws Exception { - verifyTranslated(SC_416_RANGE_NOT_SATISFIABLE, EOFException.class); + + // 416 maps the the subclass of EOFException + final IOException ex = verifyTranslated(SC_416_RANGE_NOT_SATISFIABLE, + RangeNotSatisfiableEOFException.class); + Assertions.assertThat(ex) + .isInstanceOf(EOFException.class); } @Test @@ -254,12 +278,6 @@ public void testExtractInterruptedIO() throws Throwable { .build())); } - private SdkClientException sdkClientException(String message, Throwable cause) { - return SdkClientException.builder() - .message(message) - .cause(cause) - .build(); - } @Test public void testTranslateCredentialException() throws Throwable { verifyExceptionClass(AccessDeniedException.class, @@ -375,10 +393,89 @@ public void testApiCallAttemptTimeoutExceptionToTimeout() throws Throwable { verifyCause(ApiCallAttemptTimeoutException.class, ex); // and confirm these timeouts are retried. - final S3ARetryPolicy retryPolicy = new S3ARetryPolicy(new Configuration(false)); + assertRetried(ex); + } + + @Test + public void testChannelExtraction() throws Throwable { + verifyExceptionClass(HttpChannelEOFException.class, + maybeExtractChannelException("", "/", + new NoHttpResponseException("no response"))); + } + + @Test + public void testShadedChannelExtraction() throws Throwable { + verifyExceptionClass(HttpChannelEOFException.class, + maybeExtractChannelException("", "/", + shadedNoHttpResponse())); + } + + @Test + public void testOpenSSLErrorChannelExtraction() throws Throwable { + verifyExceptionClass(HttpChannelEOFException.class, + maybeExtractChannelException("", "/", + sdkClientException(WFOPENSSL_0035_STREAM_IS_CLOSED, null))); + } + + /** + * Test handling of the unshaded HTTP client exception. + */ + @Test + public void testRawNoHttpResponseExceptionRetry() throws Throwable { + assertRetried( + verifyExceptionClass(HttpChannelEOFException.class, + translateException("test", "/", + sdkClientException(new NoHttpResponseException("no response"))))); + } + + /** + * Test handling of the shaded HTTP client exception. + */ + @Test + public void testShadedNoHttpResponseExceptionRetry() throws Throwable { + assertRetried( + verifyExceptionClass(HttpChannelEOFException.class, + translateException("test", "/", + sdkClientException(shadedNoHttpResponse())))); + } + + @Test + public void testOpenSSLErrorRetry() throws Throwable { + assertRetried( + verifyExceptionClass(HttpChannelEOFException.class, + translateException("test", "/", + sdkClientException(WFOPENSSL_0035_STREAM_IS_CLOSED, null)))); + } + + /** + * Create a shaded NoHttpResponseException. + * @return an exception. + */ + private static Exception shadedNoHttpResponse() { + return new software.amazon.awssdk.thirdparty.org.apache.http.NoHttpResponseException("shaded"); + } + + /** + * Assert that an exception is retried. + * @param ex exception + * @throws Exception failure during retry policy evaluation. + */ + private void assertRetried(final Exception ex) throws Exception { + assertRetryOutcome(ex, RetryPolicy.RetryAction.RetryDecision.RETRY); + } + + /** + * Assert that the retry policy is as expected for a given exception. + * @param ex exception + * @param decision expected decision + * @throws Exception failure during retry policy evaluation. + */ + private void assertRetryOutcome( + final Exception ex, + final RetryPolicy.RetryAction.RetryDecision decision) throws Exception { Assertions.assertThat(retryPolicy.shouldRetry(ex, 0, 0, true).action) .describedAs("retry policy for exception %s", ex) - .isEqualTo(RetryPolicy.RetryAction.RetryDecision.RETRY); + .isEqualTo(decision); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java index da1284343da9f..6eccdc23dd5d5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AInputStreamRetry.java @@ -24,7 +24,9 @@ import java.net.SocketException; import java.nio.charset.StandardCharsets; import java.util.concurrent.CompletableFuture; +import java.util.function.Function; +import org.assertj.core.api.Assertions; import software.amazon.awssdk.awscore.exception.AwsErrorDetails; import software.amazon.awssdk.awscore.exception.AwsServiceException; import software.amazon.awssdk.core.ResponseInputStream; @@ -34,41 +36,57 @@ import org.junit.Test; import org.apache.commons.io.IOUtils; +import org.apache.commons.lang3.tuple.Pair; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.audit.impl.NoopSpan; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; import org.apache.hadoop.util.functional.CallableRaisingIOE; +import org.apache.http.NoHttpResponseException; - -import static java.lang.Math.min; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.requestRange; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.sdkClientException; +import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_416_RANGE_NOT_SATISFIABLE; import static org.apache.hadoop.util.functional.FutureIO.eval; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; /** * Tests S3AInputStream retry behavior on read failure. + *

* These tests are for validating expected behavior of retrying the * S3AInputStream read() and read(b, off, len), it tests that the read should * reopen the input stream and retry the read when IOException is thrown * during the read process. + *

+ * This includes handling of out of range requests. */ public class TestS3AInputStreamRetry extends AbstractS3AMockTest { - private static final String INPUT = "ab"; + /** + * Test input stream content: charAt(x) == hex value of x. + */ + private static final String INPUT = "012345678ABCDEF"; + + /** + * Status code to raise by default. + */ + public static final int STATUS = 0; @Test public void testInputStreamReadRetryForException() throws IOException { - S3AInputStream s3AInputStream = getMockedS3AInputStream(); - assertEquals("'a' from the test input stream 'ab' should be the first " + + S3AInputStream s3AInputStream = getMockedS3AInputStream(failingInputStreamCallbacks( + awsServiceException(STATUS))); + assertEquals("'0' from the test input stream should be the first " + "character being read", INPUT.charAt(0), s3AInputStream.read()); - assertEquals("'b' from the test input stream 'ab' should be the second " + + assertEquals("'1' from the test input stream should be the second " + "character being read", INPUT.charAt(1), s3AInputStream.read()); } @Test public void testInputStreamReadLengthRetryForException() throws IOException { byte[] result = new byte[INPUT.length()]; - S3AInputStream s3AInputStream = getMockedS3AInputStream(); + S3AInputStream s3AInputStream = getMockedS3AInputStream( + failingInputStreamCallbacks(awsServiceException(STATUS))); s3AInputStream.read(result, 0, INPUT.length()); assertArrayEquals( @@ -79,7 +97,8 @@ public void testInputStreamReadLengthRetryForException() throws IOException { @Test public void testInputStreamReadFullyRetryForException() throws IOException { byte[] result = new byte[INPUT.length()]; - S3AInputStream s3AInputStream = getMockedS3AInputStream(); + S3AInputStream s3AInputStream = getMockedS3AInputStream(failingInputStreamCallbacks( + awsServiceException(STATUS))); s3AInputStream.readFully(0, result); assertArrayEquals( @@ -87,7 +106,65 @@ public void testInputStreamReadFullyRetryForException() throws IOException { INPUT.getBytes(), result); } - private S3AInputStream getMockedS3AInputStream() { + /** + * Seek and read repeatedly with every second GET failing with {@link NoHttpResponseException}. + * This should be effective in simulating {@code reopen()} failures caused by network problems. + */ + @Test + public void testReadMultipleSeeksNoHttpResponse() throws Throwable { + final RuntimeException ex = sdkClientException(new NoHttpResponseException("no response")); + // fail on even reads + S3AInputStream stream = getMockedS3AInputStream( + maybeFailInGetCallback(ex, (index) -> (index % 2 == 0))); + // 10 reads with repeated failures. + for (int i = 0; i < 10; i++) { + stream.seek(0); + final int r = stream.read(); + assertReadValueMatchesOffset(r, 0, "read attempt " + i + " of " + stream); + } + } + + /** + * Seek and read repeatedly with every second GET failing with {@link NoHttpResponseException}. + * This should be effective in simulating {@code reopen()} failures caused by network problems. + */ + @Test + public void testReadMultipleSeeksStreamClosed() throws Throwable { + final RuntimeException ex = sdkClientException(new NoHttpResponseException("no response")); + // fail on even reads + S3AInputStream stream = getMockedS3AInputStream( + maybeFailInGetCallback(ex, (index) -> (index % 2 == 0))); + // 10 reads with repeated failures. + for (int i = 0; i < 10; i++) { + stream.seek(0); + final int r = stream.read(); + assertReadValueMatchesOffset(r, 0, "read attempt " + i + " of " + stream); + } + } + + /** + * Assert that the result of read() matches the char at the expected offset. + * @param r read result + * @param pos pos in stream + * @param text text for error string. + */ + private static void assertReadValueMatchesOffset( + final int r, final int pos, final String text) { + Assertions.assertThat(r) + .describedAs("read() at %d of %s", pos, text) + .isGreaterThan(-1); + Assertions.assertThat(Character.toString((char) r)) + .describedAs("read() at %d of %s", pos, text) + .isEqualTo(String.valueOf(INPUT.charAt(pos))); + } + + /** + * Create a mocked input stream for a given callback. + * @param streamCallback callback to use on GET calls + * @return a stream. + */ + private S3AInputStream getMockedS3AInputStream( + S3AInputStream.InputStreamCallbacks streamCallback) { Path path = new Path("test-path"); String eTag = "test-etag"; String versionId = "test-version-id"; @@ -113,55 +190,108 @@ private S3AInputStream getMockedS3AInputStream() { return new S3AInputStream( s3AReadOpContext, s3ObjectAttributes, - getMockedInputStreamCallback(), + streamCallback, s3AReadOpContext.getS3AStatisticsContext().newInputStreamStatistics(), null); } /** - * Get mocked InputStreamCallbacks where we return mocked S3Object. - * + * Create mocked InputStreamCallbacks which returns a mocked S3Object and fails on + * the third invocation. + * This is the original mock stream used in this test suite; the failure logic and stream + * selection has been factored out to support different failure modes. + * @param ex exception to raise on failure * @return mocked object. */ - private S3AInputStream.InputStreamCallbacks getMockedInputStreamCallback() { + private S3AInputStream.InputStreamCallbacks failingInputStreamCallbacks( + final RuntimeException ex) { + GetObjectResponse objectResponse = GetObjectResponse.builder() .eTag("test-etag") .build(); - ResponseInputStream[] responseInputStreams = - new ResponseInputStream[] { - getMockedInputStream(objectResponse, true), - getMockedInputStream(objectResponse, true), - getMockedInputStream(objectResponse, false) - }; + final SSLException ioe = new SSLException(new SocketException("Connection reset")); + + // open() -> lazySeek() -> reopen() + // -> getObject (mockedS3ObjectIndex=1) -> getObjectContent(objectInputStreamBad1) + // read() -> objectInputStreamBad1 throws exception + // -> onReadFailure -> close wrappedStream + // -> retry(1) -> wrappedStream==null -> reopen -> getObject (mockedS3ObjectIndex=2) + // -> getObjectContent(objectInputStreamBad2)-> objectInputStreamBad2 + // -> wrappedStream.read -> objectInputStreamBad2 throws exception + // -> onReadFailure -> close wrappedStream + // -> retry(2) -> wrappedStream==null -> reopen + // -> getObject (mockedS3ObjectIndex=3) throws exception + // -> retry(3) -> wrappedStream==null -> reopen -> getObject (mockedS3ObjectIndex=4) + // -> getObjectContent(objectInputStreamGood)-> objectInputStreamGood + // -> wrappedStream.read + + return mockInputStreamCallback(ex, + attempt -> 3 == attempt, + attempt -> mockedInputStream(objectResponse, attempt < 3, ioe)); + } + + /** + * Create mocked InputStreamCallbacks which returns a mocked S3Object and fails + * when the the predicate indicates that it should. + * The stream response itself does not fail. + * @param ex exception to raise on failure + * @return mocked object. + */ + private S3AInputStream.InputStreamCallbacks maybeFailInGetCallback( + final RuntimeException ex, + final Function failurePredicate) { + GetObjectResponse objectResponse = GetObjectResponse.builder() + .eTag("test-etag") + .build(); + + return mockInputStreamCallback(ex, + failurePredicate, + attempt -> mockedInputStream(objectResponse, false, null)); + } + + /** + * Create mocked InputStreamCallbacks which returns a mocked S3Object. + * Raises the given runtime exception if the failure predicate returns true; + * the stream factory returns the input stream for the given attempt. + * @param ex exception to raise on failure + * @param failurePredicate predicate which, when true, triggers a failure on the given attempt. + * @param streamFactory factory for the stream to return on the given attempt. + * @return mocked object. + */ + private S3AInputStream.InputStreamCallbacks mockInputStreamCallback( + final RuntimeException ex, + final Function failurePredicate, + final Function> streamFactory) { + return new S3AInputStream.InputStreamCallbacks() { - private Integer mockedS3ObjectIndex = 0; + private int attempt = 0; @Override public ResponseInputStream getObject(GetObjectRequest request) { - // Set s3 client to return mocked s3object with defined read behavior. - mockedS3ObjectIndex++; - // open() -> lazySeek() -> reopen() - // -> getObject (mockedS3ObjectIndex=1) -> getObjectContent(objectInputStreamBad1) - // read() -> objectInputStreamBad1 throws exception - // -> onReadFailure -> close wrappedStream - // -> retry(1) -> wrappedStream==null -> reopen -> getObject (mockedS3ObjectIndex=2) - // -> getObjectContent(objectInputStreamBad2)-> objectInputStreamBad2 - // -> wrappedStream.read -> objectInputStreamBad2 throws exception - // -> onReadFailure -> close wrappedStream - // -> retry(2) -> wrappedStream==null -> reopen - // -> getObject (mockedS3ObjectIndex=3) throws exception - // -> retry(3) -> wrappedStream==null -> reopen -> getObject (mockedS3ObjectIndex=4) - // -> getObjectContent(objectInputStreamGood)-> objectInputStreamGood - // -> wrappedStream.read - if (mockedS3ObjectIndex == 3) { - throw AwsServiceException.builder() - .message("Failed to get S3Object") - .awsErrorDetails(AwsErrorDetails.builder().errorCode("test-code").build()) - .build(); + attempt++; + + if (failurePredicate.apply(attempt)) { + throw ex; + } + final Pair r = requestRange(request.range()); + final int start = r.getLeft().intValue(); + final int end = r.getRight().intValue(); + if (start < 0 || end < 0 || start > end) { + // not satisfiable + throw awsServiceException(SC_416_RANGE_NOT_SATISFIABLE); + } + + final ResponseInputStream stream = streamFactory.apply(attempt); + + // skip the given number of bytes from the start of the array; no-op if 0. + try { + stream.skip(start); + } catch (IOException e) { + throw sdkClientException(e); } - return responseInputStreams[min(mockedS3ObjectIndex, responseInputStreams.length) - 1]; + return stream; } @Override @@ -180,27 +310,41 @@ public void close() { }; } + /** + * Create an AwsServiceException with the given status code. + * + * @param status HTTP status code + * @return an exception. + */ + private static AwsServiceException awsServiceException(int status) { + return AwsServiceException.builder() + .message("Failed to get S3Object") + .statusCode(status) + .awsErrorDetails(AwsErrorDetails.builder().errorCode("test-code").build()) + .build(); + } + /** * Get mocked ResponseInputStream where we can trigger IOException to * simulate the read failure. * - * @param triggerFailure true when a failure injection is enabled. + * @param triggerFailure true when a failure injection is enabled in read() + * @param ioe exception to raise * @return mocked object. */ - private ResponseInputStream getMockedInputStream( - GetObjectResponse objectResponse, boolean triggerFailure) { + private ResponseInputStream mockedInputStream( + GetObjectResponse objectResponse, + boolean triggerFailure, + final IOException ioe) { FilterInputStream inputStream = new FilterInputStream(IOUtils.toInputStream(INPUT, StandardCharsets.UTF_8)) { - private final IOException exception = - new SSLException(new SocketException("Connection reset")); - @Override public int read() throws IOException { int result = super.read(); if (triggerFailure) { - throw exception; + throw ioe; } return result; } @@ -209,7 +353,7 @@ public int read() throws IOException { public int read(byte[] b, int off, int len) throws IOException { int result = super.read(b, off, len); if (triggerFailure) { - throw exception; + throw ioe; } return result; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestErrorTranslation.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestErrorTranslation.java index 0f0b2c0c34bb5..3a4994897a6b9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestErrorTranslation.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/impl/TestErrorTranslation.java @@ -67,7 +67,7 @@ public void testUnknownHostExceptionExtraction() throws Throwable { new UnknownHostException("bottom"))); final IOException ioe = intercept(UnknownHostException.class, "top", () -> { - throw maybeExtractIOException("", thrown); + throw maybeExtractIOException("", thrown, ""); }); // the wrapped exception is the top level one: no stack traces have @@ -85,7 +85,7 @@ public void testNoRouteToHostExceptionExtraction() throws Throwable { throw maybeExtractIOException("p2", sdkException("top", sdkException("middle", - new NoRouteToHostException("bottom")))); + new NoRouteToHostException("bottom"))), null); }); } @@ -96,7 +96,7 @@ public void testConnectExceptionExtraction() throws Throwable { throw maybeExtractIOException("p1", sdkException("top", sdkException("middle", - new ConnectException("bottom")))); + new ConnectException("bottom"))), null); }); } @@ -113,7 +113,7 @@ public void testUncheckedIOExceptionExtraction() throws Throwable { new UncheckedIOException( new SocketTimeoutException("bottom")))); throw maybeExtractIOException("p1", - new NoAwsCredentialsException("IamProvider", thrown.toString(), thrown)); + new NoAwsCredentialsException("IamProvider", thrown.toString(), thrown), null); }); } @@ -124,7 +124,7 @@ public void testNoConstructorExtraction() throws Throwable { throw maybeExtractIOException("p1", sdkException("top", sdkException("middle", - new NoConstructorIOE()))); + new NoConstructorIOE())), null); }); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java index 4aae84dca8e53..361c376cffd7f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java @@ -20,19 +20,29 @@ import java.io.EOFException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.concurrent.TimeUnit; +import org.assertj.core.api.Assertions; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileRange; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.S3AInputStream; import org.apache.hadoop.fs.s3a.S3ATestUtils; +import org.apache.hadoop.fs.s3a.Statistic; import org.apache.hadoop.fs.statistics.IOStatistics; +import static org.apache.hadoop.fs.FSExceptionMessages.EOF_IN_READ_FULLY; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_RANDOM; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; import static org.apache.hadoop.fs.contract.ContractTestUtils.readStream; @@ -47,6 +57,7 @@ import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.demandStringifyIOStatistics; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.ACTION_FILE_OPENED; import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; /** * Cost of openFile(). @@ -56,11 +67,13 @@ public class ITestS3AOpenCost extends AbstractS3ACostTest { private static final Logger LOG = LoggerFactory.getLogger(ITestS3AOpenCost.class); + public static final String TEXT = "0123456789ABCDEF"; + private Path testFile; private FileStatus testFileStatus; - private long fileLength; + private int fileLength; public ITestS3AOpenCost() { super(true); @@ -76,9 +89,9 @@ public void setup() throws Exception { S3AFileSystem fs = getFileSystem(); testFile = methodPath(); - writeTextFile(fs, testFile, "openfile", true); + writeTextFile(fs, testFile, TEXT, true); testFileStatus = fs.getFileStatus(testFile); - fileLength = testFileStatus.getLen(); + fileLength = (int)testFileStatus.getLen(); } /** @@ -137,15 +150,8 @@ public void testOpenFileShorterLength() throws Throwable { int offset = 2; long shortLen = fileLength - offset; // open the file - FSDataInputStream in2 = verifyMetrics(() -> - fs.openFile(testFile) - .must(FS_OPTION_OPENFILE_READ_POLICY, - FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL) - .mustLong(FS_OPTION_OPENFILE_LENGTH, shortLen) - .build() - .get(), - always(NO_HEAD_OR_LIST), - with(STREAM_READ_OPENED, 0)); + FSDataInputStream in2 = openFile(shortLen, + FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL); // verify that the statistics are in range IOStatistics ioStatistics = extractStatistics(in2); @@ -171,39 +177,227 @@ public void testOpenFileShorterLength() throws Throwable { } @Test - public void testOpenFileLongerLength() throws Throwable { - // do a second read with the length declared as longer + public void testOpenFileLongerLengthReadFully() throws Throwable { + // do a read with the length declared as longer // than it is. // An EOF will be read on readFully(), -1 on a read() + final int extra = 10; + long longLen = fileLength + extra; + + + // assert behaviors of seeking/reading past the file length. + // there is no attempt at recovery. + verifyMetrics(() -> { + try (FSDataInputStream in = openFile(longLen, + FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL)) { + byte[] out = new byte[(int) (longLen)]; + intercept(EOFException.class, () -> in.readFully(0, out)); + in.seek(longLen - 1); + assertEquals("read past real EOF on " + in, -1, in.read()); + return in.toString(); + } + }, + // two GET calls were made, one for readFully, + // the second on the read() past the EOF + // the operation has got as far as S3 + with(STREAM_READ_OPENED, 1 + 1)); + + // now on a new stream, try a full read from after the EOF + verifyMetrics(() -> { + try (FSDataInputStream in = + openFile(longLen, FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL)) { + byte[] out = new byte[extra]; + intercept(EOFException.class, () -> in.readFully(fileLength, out)); + return in.toString(); + } + }, + // two GET calls were made, one for readFully, + // the second on the read() past the EOF + // the operation has got as far as S3 + + with(STREAM_READ_OPENED, 1)); + } + + /** + * Open a file. + * @param longLen length to declare + * @param policy read policy + * @return file handle + */ + private FSDataInputStream openFile(final long longLen, String policy) + throws Exception { S3AFileSystem fs = getFileSystem(); // set a length past the actual file length - long longLen = fileLength + 10; - FSDataInputStream in3 = verifyMetrics(() -> + return verifyMetrics(() -> fs.openFile(testFile) - .must(FS_OPTION_OPENFILE_READ_POLICY, - FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL) + .must(FS_OPTION_OPENFILE_READ_POLICY, policy) .mustLong(FS_OPTION_OPENFILE_LENGTH, longLen) .build() .get(), always(NO_HEAD_OR_LIST)); + } + + /** + * Open a file with a length declared as longer than the actual file length. + * Validate input stream.read() semantics. + */ + @Test + public void testReadPastEOF() throws Throwable { + + // set a length past the actual file length + final int extra = 10; + int longLen = fileLength + extra; + try (FSDataInputStream in = openFile(longLen, + FS_OPTION_OPENFILE_READ_POLICY_RANDOM)) { + for (int i = 0; i < fileLength; i++) { + Assertions.assertThat(in.read()) + .describedAs("read() at %d", i) + .isEqualTo(TEXT.charAt(i)); + } + } + + // now open and read after the EOF; this is + // expected to return -1 on each read; there's a GET per call. + // as the counters are updated on close(), the stream must be closed + // within the verification clause. + // note how there's no attempt to alter file expected length... + // instead the call always goes to S3. + // there's no information in the exception from the SDK + describe("reading past the end of the file"); - // assert behaviors of seeking/reading past the file length. - // there is no attempt at recovery. verifyMetrics(() -> { - byte[] out = new byte[(int) longLen]; - intercept(EOFException.class, - () -> in3.readFully(0, out)); - in3.seek(longLen - 1); - assertEquals("read past real EOF on " + in3, - -1, in3.read()); - in3.close(); - return in3.toString(); + try (FSDataInputStream in = + openFile(longLen, FS_OPTION_OPENFILE_READ_POLICY_RANDOM)) { + for (int i = 0; i < extra; i++) { + final int p = fileLength + i; + in.seek(p); + Assertions.assertThat(in.read()) + .describedAs("read() at %d", p) + .isEqualTo(-1); + } + return in.toString(); + } }, - // two GET calls were made, one for readFully, - // the second on the read() past the EOF - // the operation has got as far as S3 - with(STREAM_READ_OPENED, 2)); + with(Statistic.ACTION_HTTP_GET_REQUEST, extra)); + } + + /** + * Test {@code PositionedReadable.readFully()} past EOF in a file. + */ + @Test + public void testPositionedReadableReadFullyPastEOF() throws Throwable { + // now, next corner case. Do a readFully() of more bytes than the file length. + // we expect failure. + // this codepath does a GET to the end of the (expected) file length, and when + // that GET returns -1 from the read because the bytes returned is less than + // expected then the readFully call fails. + describe("PositionedReadable.readFully() past the end of the file"); + // set a length past the actual file length + final int extra = 10; + int longLen = fileLength + extra; + verifyMetrics(() -> { + try (FSDataInputStream in = + openFile(longLen, FS_OPTION_OPENFILE_READ_POLICY_RANDOM)) { + byte[] buf = new byte[(int) (longLen + 1)]; + // readFully will fail + intercept(EOFException.class, () -> { + in.readFully(0, buf); + return in; + }); + assertS3StreamClosed(in); + return "readFully past EOF"; + } + }, + with(Statistic.ACTION_HTTP_GET_REQUEST, 1)); // no attempt to re-open + } + + /** + * Test {@code PositionedReadable.read()} past EOF in a file. + */ + @Test + public void testPositionedReadableReadPastEOF() throws Throwable { + + // set a length past the actual file length + final int extra = 10; + int longLen = fileLength + extra; + + describe("PositionedReadable.read() past the end of the file"); + + verifyMetrics(() -> { + try (FSDataInputStream in = + openFile(longLen, FS_OPTION_OPENFILE_READ_POLICY_RANDOM)) { + byte[] buf = new byte[(int) (longLen + 1)]; + + // readFully will read to the end of the file + Assertions.assertThat(in.read(0, buf, 0, buf.length)) + .isEqualTo(fileLength); + assertS3StreamOpen(in); + + // now attempt to read after EOF + Assertions.assertThat(in.read(fileLength, buf, 0, buf.length)) + .describedAs("PositionedReadable.read() past EOF") + .isEqualTo(-1); + // stream is closed as part of this failure + assertS3StreamClosed(in); + return "PositionedReadable.read()) past EOF"; + } + }, + with(Statistic.ACTION_HTTP_GET_REQUEST, 1)); // no attempt to re-open + } + + /** + * Test Vector Read past EOF in a file. + * See related tests in {@code ITestS3AContractVectoredRead} + */ + @Test + public void testVectorReadPastEOF() throws Throwable { + + // set a length past the actual file length + final int extra = 10; + int longLen = fileLength + extra; + + describe("Vector read past the end of the file"); + verifyMetrics(() -> { + try (FSDataInputStream in = + openFile(longLen, FS_OPTION_OPENFILE_READ_POLICY_RANDOM)) { + assertS3StreamClosed(in); + byte[] buf = new byte[longLen]; + ByteBuffer bb = ByteBuffer.wrap(buf); + final FileRange range = FileRange.createFileRange(0, longLen); + in.readVectored(Arrays.asList(range), (i) -> bb); + interceptFuture(EOFException.class, + EOF_IN_READ_FULLY, + ContractTestUtils.VECTORED_READ_OPERATION_TEST_TIMEOUT_SECONDS, + TimeUnit.SECONDS, + range.getData()); + assertS3StreamClosed(in); + return "vector read past EOF"; + } + }, + with(Statistic.ACTION_HTTP_GET_REQUEST, 1)); + } + + /** + * Assert that the inner S3 Stream is closed. + * @param in input stream + */ + private static void assertS3StreamClosed(final FSDataInputStream in) { + S3AInputStream s3ain = (S3AInputStream) in.getWrappedStream(); + Assertions.assertThat(s3ain.isObjectStreamOpen()) + .describedAs("stream is open") + .isFalse(); + } + + /** + * Assert that the inner S3 Stream is open. + * @param in input stream + */ + private static void assertS3StreamOpen(final FSDataInputStream in) { + S3AInputStream s3ain = (S3AInputStream) in.getWrappedStream(); + Assertions.assertThat(s3ain.isObjectStreamOpen()) + .describedAs("stream is closed") + .isTrue(); } } From 6591038063cf6ebab399f99dc51f5227472b8613 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 16 Jan 2024 14:16:12 +0000 Subject: [PATCH 013/164] HADOOP-18975 S3A: Add option fs.s3a.endpoint.fips to use AWS FIPS endpoints (#6277) Adds a new option `fs.s3a.endpoint.fips` to switch the SDK client to use FIPS endpoints, as an alternative to explicitly declaring them. * The option is available as a path capability for probes. * SDK v2 itself doesn't know that some regions don't have FIPS endpoints * SDK only fails with endpoint + fips flag as a retried exception; wit this change the S3A client should fail fast. PR fails fast. * Adds a new "connecting.md" doc; moves existing docs there and restructures. * New Tests in ITestS3AEndpointRegion bucket-info command support: * added to list of path capabilities * added -fips flag and test for explicit probe * also now prints bucket region * and removed some of the obsolete s3guard options * updated docs Contributed by Steve Loughran --- .../org/apache/hadoop/fs/s3a/Constants.java | 9 + .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 22 + .../apache/hadoop/fs/s3a/S3AFileSystem.java | 12 + .../apache/hadoop/fs/s3a/S3ClientFactory.java | 23 + .../hadoop/fs/s3a/impl/InternalConstants.java | 2 + .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 22 +- .../markdown/tools/hadoop-aws/connecting.md | 477 ++++++++++++++++++ .../site/markdown/tools/hadoop-aws/index.md | 261 +--------- .../markdown/tools/hadoop-aws/performance.md | 1 + .../site/markdown/tools/hadoop-aws/s3guard.md | 17 +- .../fs/s3a/ITestS3ABucketExistence.java | 2 + .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 124 +++-- .../fs/s3a/s3guard/ITestS3GuardTool.java | 16 + .../src/test/resources/core-site.xml | 6 + 14 files changed, 688 insertions(+), 306 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index c1c12b5948284..e33f762cdfcf7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1335,6 +1335,15 @@ private Constants() { */ public static final String AWS_S3_DEFAULT_REGION = "us-east-2"; + /** + * Is the endpoint a FIPS endpoint? + * Can be queried as a path capability. + * Value {@value}. + */ + public static final String FIPS_ENDPOINT = "fs.s3a.endpoint.fips"; + + public static final boolean ENDPOINT_FIPS_DEFAULT = false; + /** * Require that all S3 access is made through Access Points. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 05ac5ef921c95..0fde93e6548bb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -22,6 +22,7 @@ import java.net.URI; import java.net.URISyntaxException; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -54,6 +55,7 @@ import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_DEFAULT_REGION; import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; +import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME; import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED_DEFAULT; @@ -63,6 +65,7 @@ import static org.apache.hadoop.fs.s3a.auth.SignerFactory.createHttpSigner; import static org.apache.hadoop.fs.s3a.impl.AWSHeaders.REQUESTER_PAYS_HEADER; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.AUTH_SCHEME_AWS_SIGV_4; +import static org.apache.hadoop.util.Preconditions.checkArgument; /** @@ -102,6 +105,13 @@ public class DefaultS3ClientFactory extends Configured /** Exactly once log to inform about ignoring the AWS-SDK Warnings for CSE. */ private static final LogExactlyOnce IGNORE_CSE_WARN = new LogExactlyOnce(LOG); + /** + * Error message when an endpoint is set with FIPS enabled: {@value}. + */ + @VisibleForTesting + public static final String ERROR_ENDPOINT_WITH_FIPS = + "An endpoint cannot set when " + FIPS_ENDPOINT + " is true"; + @Override public S3Client createS3Client( final URI uri, @@ -248,6 +258,7 @@ protected ClientOverrideConfiguration createClientOverrideConfiguration( * @param conf conf configuration object * @param S3 client builder type * @param S3 client type + * @throws IllegalArgumentException if endpoint is set when FIPS is enabled. */ private , ClientT> void configureEndpointAndRegion( BuilderT builder, S3ClientCreationParameters parameters, Configuration conf) { @@ -263,7 +274,18 @@ private , ClientT> void region = Region.of(configuredRegion); } + // FIPs? Log it, then reject any attempt to set an endpoint + final boolean fipsEnabled = parameters.isFipsEnabled(); + if (fipsEnabled) { + LOG.debug("Enabling FIPS mode"); + } + // always setting it guarantees the value is non-null, + // which tests expect. + builder.fipsEnabled(fipsEnabled); + if (endpoint != null) { + checkArgument(!fipsEnabled, + "%s : %s", ERROR_ENDPOINT_WITH_FIPS, endpoint); builder.endpointOverride(endpoint); // No region was configured, try to determine it from the endpoint. if (region == null) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index df7d3f1fb6891..1aad1ad2f858f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -461,6 +461,11 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, */ private boolean isMultipartCopyEnabled; + /** + * Is FIPS enabled? + */ + private boolean fipsEnabled; + /** * A cache of files that should be deleted when the FileSystem is closed * or the JVM is exited. @@ -614,6 +619,8 @@ public void initialize(URI name, Configuration originalConf) ? conf.getTrimmed(AWS_REGION) : accessPoint.getRegion(); + fipsEnabled = conf.getBoolean(FIPS_ENDPOINT, ENDPOINT_FIPS_DEFAULT); + // is this an S3Express store? s3ExpressStore = isS3ExpressStore(bucket, endpoint); @@ -1046,6 +1053,7 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { .withMultipartThreshold(multiPartThreshold) .withTransferManagerExecutor(unboundedThreadPool) .withRegion(configuredRegion) + .withFipsEnabled(fipsEnabled) .withExpressCreateSession( conf.getBoolean(S3EXPRESS_CREATE_SESSION, S3EXPRESS_CREATE_SESSION_DEFAULT)); @@ -5521,6 +5529,10 @@ public boolean hasPathCapability(final Path path, final String capability) case OPTIMIZED_COPY_FROM_LOCAL: return optimizedCopyFromLocal; + // probe for a fips endpoint + case FIPS_ENDPOINT: + return fipsEnabled; + default: return super.hasPathCapability(p, cap); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index 305bcbb56504b..404a255528ff4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -176,6 +176,11 @@ final class S3ClientCreationParameters { */ private boolean expressCreateSession = S3EXPRESS_CREATE_SESSION_DEFAULT; + /** + * Is FIPS enabled? + */ + private boolean fipsEnabled; + /** * List of execution interceptors to include in the chain * of interceptors in the SDK. @@ -461,5 +466,23 @@ public String toString() { ", expressCreateSession=" + expressCreateSession + '}'; } + + /** + * Get the FIPS flag. + * @return is fips enabled + */ + public boolean isFipsEnabled() { + return fipsEnabled; + } + + /** + * Set builder value. + * @param value new value + * @return the builder + */ + public S3ClientCreationParameters withFipsEnabled(final boolean value) { + fipsEnabled = value; + return this; + } } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java index 1148f6fcd4831..8ebf8c013d10a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java @@ -38,6 +38,7 @@ import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS; import static org.apache.hadoop.fs.s3a.Constants.DIRECTORY_OPERATIONS_PURGE_UPLOADS; import static org.apache.hadoop.fs.s3a.Constants.ENABLE_MULTI_DELETE; +import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A_CREATE_PERFORMANCE_ENABLED; import static org.apache.hadoop.fs.s3a.Constants.STORE_CAPABILITY_AWS_V2; @@ -272,6 +273,7 @@ private InternalConstants() { FS_CHECKSUMS, FS_MULTIPART_UPLOADER, DIRECTORY_LISTING_INCONSISTENT, + FIPS_ENDPOINT, // s3 specific STORE_CAPABILITY_AWS_V2, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 41251d190c442..26b6acda30906 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -357,12 +357,11 @@ public static class BucketInfo extends S3GuardTool { public static final String NAME = BUCKET_INFO; public static final String GUARDED_FLAG = "guarded"; public static final String UNGUARDED_FLAG = "unguarded"; - public static final String AUTH_FLAG = "auth"; - public static final String NONAUTH_FLAG = "nonauth"; public static final String ENCRYPTION_FLAG = "encryption"; public static final String MAGIC_FLAG = "magic"; public static final String MARKERS_FLAG = "markers"; public static final String MARKERS_AWARE = "aware"; + public static final String FIPS_FLAG = "fips"; public static final String PURPOSE = "provide/check information" + " about a specific bucket"; @@ -370,8 +369,7 @@ public static class BucketInfo extends S3GuardTool { private static final String USAGE = NAME + " [OPTIONS] s3a://BUCKET\n" + "\t" + PURPOSE + "\n\n" + "Common options:\n" - + " -" + AUTH_FLAG + " - Require the S3Guard mode to be \"authoritative\"\n" - + " -" + NONAUTH_FLAG + " - Require the S3Guard mode to be \"non-authoritative\"\n" + + " -" + FIPS_FLAG + " - Require the client is using a FIPS endpoint\n" + " -" + MAGIC_FLAG + " - Require the S3 filesystem to be support the \"magic\" committer\n" + " -" + ENCRYPTION_FLAG @@ -395,7 +393,7 @@ public static class BucketInfo extends S3GuardTool { + " directory markers are not deleted"; public BucketInfo(Configuration conf) { - super(conf, GUARDED_FLAG, UNGUARDED_FLAG, AUTH_FLAG, NONAUTH_FLAG, MAGIC_FLAG); + super(conf, GUARDED_FLAG, UNGUARDED_FLAG, FIPS_FLAG, MAGIC_FLAG); CommandFormat format = getCommandFormat(); format.addOptionWithValue(ENCRYPTION_FLAG); format.addOptionWithValue(MARKERS_FLAG); @@ -462,6 +460,10 @@ public int run(String[] args, PrintStream out) println(out, "\tEndpoint: %s=%s", ENDPOINT, StringUtils.isNotEmpty(endpoint) ? endpoint : "(unset)"); + String region = conf.getTrimmed(AWS_REGION, ""); + println(out, "\tRegion: %s=%s", AWS_REGION, + StringUtils.isNotEmpty(region) ? region : "(unset)"); + String encryption = printOption(out, "\tEncryption", Constants.S3_ENCRYPTION_ALGORITHM, "none"); @@ -487,12 +489,12 @@ public int run(String[] args, PrintStream out) FS_S3A_COMMITTER_NAME, COMMITTER_NAME_FILE); switch (committer) { case COMMITTER_NAME_FILE: - println(out, "The original 'file' commmitter is active" + println(out, "The original 'file' committer is active" + " -this is slow and potentially unsafe"); break; case InternalCommitterConstants.COMMITTER_NAME_STAGING: println(out, "The 'staging' committer is used " - + "-prefer the 'directory' committer"); + + "-prefer the 'magic' committer"); // fall through case COMMITTER_NAME_DIRECTORY: // fall through @@ -555,13 +557,17 @@ public int run(String[] args, PrintStream out) processMarkerOption(out, fs, getCommandFormat().getOptValue(MARKERS_FLAG)); - // and check for capabilitities + // and check for capabilities println(out, "%nStore Capabilities"); for (String capability : S3A_DYNAMIC_CAPABILITIES) { out.printf("\t%s %s%n", capability, fs.hasPathCapability(root, capability)); } println(out, ""); + + if (commands.getOpt(FIPS_FLAG) && !fs.hasPathCapability(root, FIPS_ENDPOINT)) { + throw badState("FIPS endpoint was required but the filesystem is not using it"); + } // and finally flush the output and report a success. out.flush(); return SUCCESS; diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md new file mode 100644 index 0000000000000..600e1e128a2c8 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/connecting.md @@ -0,0 +1,477 @@ + + +# Connecting to an Amazon S3 Bucket through the S3A Connector + + + + +1. This document covers how to connect to and authenticate with S3 stores, primarily AWS S3. +2. There have been changes in this mechanism between the V1 and V2 SDK, in particular specifying +the region is now preferred to specifying the regional S3 endpoint. +3. For connecting to third-party stores, please read [Working with Third-party S3 Stores](third_party_stores.html) *after* reading this document. + +## Foundational Concepts + +### AWS Regions and Availability Zones + +AWS provides storage, compute and other services around the world, in *regions*. + +Data in S3 is stored *buckets*; each bucket is a single region. + +There are some "special" regions: China, AWS GovCloud. +It is *believed* that the S3A connector works in these places, at least to the extent that nobody has complained about it not working. + +### Endpoints + +The S3A connector connects to Amazon S3 storage over HTTPS connections, either directly or through an HTTP proxy. +HTTP HEAD and GET, PUT, POST and DELETE requests are invoked to perform different read/write operations against the store. + +There are multiple ways to connect to an S3 bucket + +* To an [S3 Endpoint](https://docs.aws.amazon.com/general/latest/gr/s3.html); an HTTPS server hosted by amazon or a third party. +* To a FIPS-compliant S3 Endpoint. +* To an AWS S3 [Access Point](https://docs.aws.amazon.com/AmazonS3/latest/userguide/access-points.html). +* Through a VPC connection, [AWS PrivateLink for Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html). +* AWS [Outposts](https://aws.amazon.com/outposts/). + +The S3A connector supports all these; S3 Endpoints are the primary mechanism used -either explicitly declared or automatically determined from the declared region of the bucket. + +Not supported: +* AWS [Snowball](https://aws.amazon.com/snowball/). + +As of December 2023, AWS S3 uses Transport Layer Security (TLS) [version 1.2](https://aws.amazon.com/blogs/security/tls-1-2-required-for-aws-endpoints/) to secure the communications channel; the S3A client is does this through +the Apache [HttpClient library](https://hc.apache.org/index.html). + +### Third party stores + +Third-party stores implementing the S3 API are also supported. +These often only implement a subset of the S3 API; not all features are available. +If TLS authentication is used, then the HTTPS certificates for the private stores +_MUST_ be installed on the JVMs on hosts within the Hadoop cluster. + +See [Working with Third-party S3 Stores](third_party_stores.html) *after* reading this document. + + +## Connection Settings + +There are three core settings to connect to an S3 store, endpoint, region and whether or not to use path style access. + + +```xml + + fs.s3a.endpoint + AWS S3 endpoint to connect to. An up-to-date list is + provided in the AWS Documentation: regions and endpoints. Without this + property, the standard region (s3.amazonaws.com) is assumed. + + + + + fs.s3a.endpoint.region + REGION + AWS Region of the data + + + + fs.s3a.path.style.access + false + Enable S3 path style access by disabling the default virtual hosting behaviour. + Needed for AWS PrivateLink, S3 AccessPoints, and, generally, third party stores. + Default: false. + + +``` + +Historically the S3A connector has preferred the endpoint as defined by the option `fs.s3a.endpoint`. +With the move to the AWS V2 SDK, there is more emphasis on the region, set by the `fs.s3a.endpoint.region` option. + +Normally, declaring the region in `fs.s3a.endpoint.region` should be sufficient to set up the network connection to correctly connect to an AWS-hosted S3 store. + +### Network timeouts + +See [Timeouts](performance.html#timeouts). + +### Low-level Network Options + +```xml + + + fs.s3a.connection.maximum + 200 + Controls the maximum number of simultaneous connections to S3. + This must be bigger than the value of fs.s3a.threads.max so as to stop + threads being blocked waiting for new HTTPS connections. + + + + + fs.s3a.connection.ssl.enabled + true + + Enables or disables SSL connections to AWS services. + + + + + fs.s3a.ssl.channel.mode + Default_JSSE + + TLS implementation and cipher options. + Values: OpenSSL, Default, Default_JSSE, Default_JSSE_with_GCM + + Default_JSSE is not truly the the default JSSE implementation because + the GCM cipher is disabled when running on Java 8. However, the name + was not changed in order to preserve backwards compatibility. Instead, + new mode called Default_JSSE_with_GCM delegates to the default JSSE + implementation with no changes to the list of enabled ciphers. + + OpenSSL requires the wildfly JAR on the classpath and a compatible installation of the openssl binaries. + It is often faster than the JVM libraries, but also trickier to + use. + + + + + fs.s3a.socket.send.buffer + 8192 + + Socket send buffer hint to amazon connector. Represented in bytes. + + + + + fs.s3a.socket.recv.buffer + 8192 + + Socket receive buffer hint to amazon connector. Represented in bytes. + + +``` + +### Proxy Settings + +Connections to S3A stores can be made through an HTTP or HTTPS proxy. + +```xml + + fs.s3a.proxy.host + + Hostname of the (optional) proxy server for S3 connections. + + + + + fs.s3a.proxy.ssl.enabled + false + + Does the proxy use a TLS connection? + + + + + fs.s3a.proxy.port + + Proxy server port. If this property is not set + but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with + the value of fs.s3a.connection.ssl.enabled). + + + + + fs.s3a.proxy.username + Username for authenticating with proxy server. + + + + fs.s3a.proxy.password + Password for authenticating with proxy server. + + + + fs.s3a.proxy.domain + Domain for authenticating with proxy server. + + + + fs.s3a.proxy.workstation + Workstation for authenticating with proxy server. + +``` + +Sometimes the proxy can be source of problems, especially if HTTP connections are kept +in the connection pool for some time. +Experiment with the values of `fs.s3a.connection.ttl` and `fs.s3a.connection.request.timeout` +if long-lived connections have problems. + + +## Using Per-Bucket Configuration to access data round the world + +S3 Buckets are hosted in different "regions", the default being "US-East-1". +The S3A client talks to this region by default, issuing HTTP requests +to the server `s3.amazonaws.com`. + +S3A can work with buckets from any region. Each region has its own +S3 endpoint, documented [by Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region). + +1. Applications running in EC2 infrastructure do not pay for IO to/from +*local S3 buckets*. They will be billed for access to remote buckets. Always +use local buckets and local copies of data, wherever possible. +2. With the V4 signing protocol, AWS requires the explicit region endpoint +to be used —hence S3A must be configured to use the specific endpoint. This +is done in the configuration option `fs.s3a.endpoint`. +3. All endpoints other than the default endpoint only support interaction +with buckets local to that S3 instance. +4. Standard S3 buckets support "cross-region" access where use of the original `us-east-1` + endpoint allows access to the data, but newer storage types, particularly S3 Express are + not supported. + + + +If the wrong endpoint is used, the request will fail. This may be reported as a 301/redirect error, +or as a 400 Bad Request: take these as cues to check the endpoint setting of +a bucket. + +The up to date list of regions is [Available online](https://docs.aws.amazon.com/general/latest/gr/s3.html). + +This list can be used to specify the endpoint of individual buckets, for example +for buckets in the central and EU/Ireland endpoints. + +```xml + + fs.s3a.bucket.landsat-pds.endpoint + s3-us-west-2.amazonaws.com + + + + fs.s3a.bucket.eu-dataset.endpoint + s3.eu-west-1.amazonaws.com + +``` + +Declaring the region for the data is simpler, as it avoid having to look up the full URL and having to worry about historical quirks of regional endpoint hostnames. + +```xml + + fs.s3a.bucket.landsat-pds.endpoint.region + us-west-2 + The endpoint for s3a://landsat-pds URLs + + + + fs.s3a.bucket.eu-dataset.endpoint.region + eu-west-1 + +``` + + +## AWS PrivateLink + +[AWS PrivateLink for Amazon S3](https://docs.aws.amazon.com/AmazonS3/latest/userguide/privatelink-interface-endpoints.html) allows for a private connection to a bucket to be defined, with network access rules managing how a bucket can be accessed. + + +1. Follow the documentation to create the private link +2. retrieve the DNS name from the console, such as `vpce-f264a96c-6d27bfa7c85e.s3.us-west-2.vpce.amazonaws.com` +3. Convert this to an endpoint URL by prefixing "https://bucket." +4. Declare this as the bucket endpoint and switch to path-style access. +5. Declare the region: there is no automated determination of the region from + the `vpce` URL. + +```xml + + + fs.s3a.bucket.example-usw2.endpoint + https://bucket.vpce-f264a96c-6d27bfa7c85e.s3.us-west-2.vpce.amazonaws.com/ + + + + fs.s3a.bucket.example-usw2.path.style.access + true + + + + fs.s3a.bucket.example-usw2.endpoint.region + us-west-2 + +``` + +## Federal Information Processing Standards (FIPS) Endpoints + + +It is possible to use [FIPs-compliant](https://www.nist.gov/itl/fips-general-information) endpoints which +support a restricted subset of TLS algorithms. + +Amazon provide a specific set of [FIPS endpoints](https://aws.amazon.com/compliance/fips/) +to use so callers can be confident that the network communication is compliant with the standard: +non-compliant algorithms are unavailable. + +The boolean option `fs.s3a.endpoint.fips` (default `false`) switches the S3A connector to using the FIPS endpoint of a region. + +```xml + + fs.s3a.endpoint.fips + true + Use the FIPS endpoint + +``` + +For a single bucket: +```xml + + fs.s3a.bucket.landsat-pds.endpoint.fips + true + Use the FIPS endpoint for the landsat dataset + +``` + +If this option is `true`, the endpoint option `fs.s3a.endpoint` MUST NOT be set: + +``` +A custom endpoint cannot be combined with FIPS: https://s3.eu-west-2.amazonaws.com +``` + +The SDK calculates the FIPS-specific endpoint without any awareness as to whether FIPs is supported by a region. The first attempt to interact with the service will fail + +``` +java.net.UnknownHostException: software.amazon.awssdk.core.exception.SdkClientException: +Received an UnknownHostException when attempting to interact with a service. + See cause for the exact endpoint that is failing to resolve. + If this is happening on an endpoint that previously worked, + there may be a network connectivity issue or your DNS cache + could be storing endpoints for too long.: + example-london-1.s3-fips.eu-west-2.amazonaws.com + +``` + +*Important* OpenSSL and FIPS endpoints + +Linux distributions with an FIPS-compliant SSL library may not be compatible with wildfly. +Always use with the JDK SSL implementation unless you are confident that the library +is compatible, or wish to experiment with the settings outside of production deployments. + +```xml + + fs.s3a.ssl.channel.mode + Default_JSSE + +``` + +## Configuring S3 AccessPoints usage with S3A + +S3A supports [S3 Access Point](https://aws.amazon.com/s3/features/access-points/) usage which +improves VPC integration with S3 and simplifies your data's permission model because different +policies can be applied now on the Access Point level. For more information about why to use and +how to create them make sure to read the official documentation. + +Accessing data through an access point, is done by using its ARN, as opposed to just the bucket name. +You can set the Access Point ARN property using the following per bucket configuration property: + +```xml + + fs.s3a.bucket.sample-bucket.accesspoint.arn + {ACCESSPOINT_ARN_HERE} + Configure S3a traffic to use this AccessPoint + +``` + +This configures access to the `sample-bucket` bucket for S3A, to go through the +new Access Point ARN. So, for example `s3a://sample-bucket/key` will now use your +configured ARN when getting data from S3 instead of your bucket. + +_the name of the bucket used in the s3a:// URLs is irrelevant; it is not used when connecting with the store_ + +Example + +```xml + + fs.s3a.bucket.example-ap.accesspoint.arn + arn:aws:s3:eu-west-2:152813717728:accesspoint/ap-example-london + AccessPoint bound to bucket name example-ap + +``` + +The `fs.s3a.accesspoint.required` property can also require all access to S3 to go through Access +Points. This has the advantage of increasing security inside a VPN / VPC as you only allow access +to known sources of data defined through Access Points. In case there is a need to access a bucket +directly (without Access Points) then you can use per bucket overrides to disable this setting on a +bucket by bucket basis i.e. `fs.s3a.bucket.{YOUR-BUCKET}.accesspoint.required`. + +```xml + + + fs.s3a.accesspoint.required + true + + + + fs.s3a.bucket.example-bucket.accesspoint.required + false + +``` + +Before using Access Points make sure you're not impacted by the following: +- `ListObjectsV1` is not supported, this is also deprecated on AWS S3 for performance reasons; +- The endpoint for S3 requests will automatically change to use +`s3-accesspoint.REGION.amazonaws.{com | com.cn}` depending on the Access Point ARN. While +considering endpoints, if you have any custom signers that use the host endpoint property make +sure to update them if needed; + +## Debugging network problems + +The `storediag` command within the utility [cloudstore](https://github.com/exampleoughran/cloudstore) +JAR is recommended as the way to view and print settings. + +If `storediag` doesn't connect to your S3 store, *nothing else will*. + +## Common Sources of Connection Problems + +Based on the experience of people who field support calls, here are +some of the main connectivity issues which cause problems. + +### Inconsistent configuration across a cluster + +All hosts in the cluster need to have the configuration secrets; +local environment variables are not enough. + +If HTTPS/TLS is used for a private store, the relevant certificates MUST be installed everywhere. + +For applications such as distcp, the options need to be passed with the job. + +### Confusion between public/private S3 Stores. + +If your cluster is configured to use a private store, AWS-hosted buckets are not visible. +If you wish to read access in a private store, you need to change the endpoint. + +Private S3 stores generally expect path style access. + +### Region and endpoints misconfigured + +These usually surface rapidly and with meaningful messages. + +Region errors generally surface as +* `UnknownHostException` +* `AWSRedirectException` "Received permanent redirect response to region" + +Endpoint configuration problems can be more varied, as they are just HTTPS URLs. + +### Wildfly/OpenSSL Brittleness + +When it works, it is fast. But it is fussy as to openSSL implementations, TLS protocols and more. +Because it uses the native openssl binaries, operating system updates can trigger regressions. + +Disabling it should be the first step to troubleshooting any TLS problems. + +### Proxy setup + +If there is a proxy, set it up correctly. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index dcf3be2b08314..0f09c7f873152 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -33,6 +33,7 @@ full details. ## Documents +* [Connecting](./connecting.html) * [Encryption](./encryption.html) * [Performance](./performance.html) * [The upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html) @@ -223,6 +224,10 @@ Do not inadvertently share these credentials through means such as: If you do any of these: change your credentials immediately! +## Connecting to Amazon S3 or a third-party store + +See [Connecting to an Amazon S3 Bucket through the S3A Connector](connecting.md). + ## Authenticating with S3 Except when interacting with public S3 buckets, the S3A client @@ -835,61 +840,15 @@ Here are some the S3A properties for use in production. - fs.s3a.connection.ssl.enabled - true - Enables or disables SSL connections to AWS services. - Also sets the default port to use for the s3a proxy settings, - when not explicitly set in fs.s3a.proxy.port. - - - - fs.s3a.endpoint - AWS S3 endpoint to connect to. An up-to-date list is - provided in the AWS Documentation: regions and endpoints. Without this - property, the standard region (s3.amazonaws.com) is assumed. + fs.s3a.connection.maximum + 96 + Controls the maximum number of simultaneous connections to S3. + This must be bigger than the value of fs.s3a.threads.max so as to stop + threads being blocked waiting for new HTTPS connections. + Why not equal? The AWS SDK transfer manager also uses these connections. - - fs.s3a.path.style.access - false - Enable S3 path style access ie disabling the default virtual hosting behaviour. - Useful for S3A-compliant storage providers as it removes the need to set up DNS for virtual hosting. - - - - - fs.s3a.proxy.host - Hostname of the (optional) proxy server for S3 connections. - - - - fs.s3a.proxy.port - Proxy server port. If this property is not set - but fs.s3a.proxy.host is, port 80 or 443 is assumed (consistent with - the value of fs.s3a.connection.ssl.enabled). - - - - fs.s3a.proxy.username - Username for authenticating with proxy server. - - - - fs.s3a.proxy.password - Password for authenticating with proxy server. - - - - fs.s3a.proxy.domain - Domain for authenticating with proxy server. - - - - fs.s3a.proxy.workstation - Workstation for authenticating with proxy server. - - fs.s3a.attempts.maximum 5 @@ -1005,14 +964,6 @@ Here are some the S3A properties for use in production. implementations can still be used - - fs.s3a.accesspoint.required - false - Require that all S3 access is made through Access Points and not through - buckets directly. If enabled, use per-bucket overrides to allow bucket access to a specific set - of buckets. - - fs.s3a.block.size 32M @@ -1218,23 +1169,6 @@ Here are some the S3A properties for use in production. - - fs.s3a.connection.request.timeout - 0 - - Time out on HTTP requests to the AWS service; 0 means no timeout. - Measured in seconds; the usual time suffixes are all supported - - Important: this is the maximum duration of any AWS service call, - including upload and copy operations. If non-zero, it must be larger - than the time to upload multi-megabyte blocks to S3 from the client, - and to rename many-GB files. Use with care. - - Values that are larger than Integer.MAX_VALUE milliseconds are - converged to Integer.MAX_VALUE milliseconds - - - fs.s3a.etag.checksum.enabled false @@ -1699,179 +1633,6 @@ For a site configuration of: The bucket "nightly" will be encrypted with SSE-KMS using the KMS key `arn:aws:kms:eu-west-2:1528130000000:key/753778e4-2d0f-42e6-b894-6a3ae4ea4e5f` -### Using Per-Bucket Configuration to access data round the world - -S3 Buckets are hosted in different "regions", the default being "US-East". -The S3A client talks to this region by default, issuing HTTP requests -to the server `s3.amazonaws.com`. - -S3A can work with buckets from any region. Each region has its own -S3 endpoint, documented [by Amazon](http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region). - -1. Applications running in EC2 infrastructure do not pay for IO to/from -*local S3 buckets*. They will be billed for access to remote buckets. Always -use local buckets and local copies of data, wherever possible. -1. The default S3 endpoint can support data IO with any bucket when the V1 request -signing protocol is used. -1. When the V4 signing protocol is used, AWS requires the explicit region endpoint -to be used —hence S3A must be configured to use the specific endpoint. This -is done in the configuration option `fs.s3a.endpoint`. -1. All endpoints other than the default endpoint only support interaction -with buckets local to that S3 instance. - -While it is generally simpler to use the default endpoint, working with -V4-signing-only regions (Frankfurt, Seoul) requires the endpoint to be identified. -Expect better performance from direct connections —traceroute will give you some insight. - -If the wrong endpoint is used, the request may fail. This may be reported as a 301/redirect error, -or as a 400 Bad Request: take these as cues to check the endpoint setting of -a bucket. - -Here is a list of properties defining all AWS S3 regions, current as of June 2017: - -```xml - - - central.endpoint - s3.amazonaws.com - - - - canada.endpoint - s3.ca-central-1.amazonaws.com - - - - frankfurt.endpoint - s3.eu-central-1.amazonaws.com - - - - ireland.endpoint - s3-eu-west-1.amazonaws.com - - - - london.endpoint - s3.eu-west-2.amazonaws.com - - - - mumbai.endpoint - s3.ap-south-1.amazonaws.com - - - - ohio.endpoint - s3.us-east-2.amazonaws.com - - - - oregon.endpoint - s3-us-west-2.amazonaws.com - - - - sao-paolo.endpoint - s3-sa-east-1.amazonaws.com - - - - seoul.endpoint - s3.ap-northeast-2.amazonaws.com - - - - singapore.endpoint - s3-ap-southeast-1.amazonaws.com - - - - sydney.endpoint - s3-ap-southeast-2.amazonaws.com - - - - tokyo.endpoint - s3-ap-northeast-1.amazonaws.com - - - - virginia.endpoint - ${central.endpoint} - -``` - -This list can be used to specify the endpoint of individual buckets, for example -for buckets in the central and EU/Ireland endpoints. - -```xml - - fs.s3a.bucket.landsat-pds.endpoint - ${central.endpoint} - The endpoint for s3a://landsat-pds URLs - - - - fs.s3a.bucket.eu-dataset.endpoint - ${ireland.endpoint} - The endpoint for s3a://eu-dataset URLs - -``` - -Why explicitly declare a bucket bound to the central endpoint? It ensures -that if the default endpoint is changed to a new region, data store in -US-east is still reachable. - -## Configuring S3 AccessPoints usage with S3A -S3a now supports [S3 Access Point](https://aws.amazon.com/s3/features/access-points/) usage which -improves VPC integration with S3 and simplifies your data's permission model because different -policies can be applied now on the Access Point level. For more information about why to use and -how to create them make sure to read the official documentation. - -Accessing data through an access point, is done by using its ARN, as opposed to just the bucket name. -You can set the Access Point ARN property using the following per bucket configuration property: -```xml - - fs.s3a.bucket.sample-bucket.accesspoint.arn - {ACCESSPOINT_ARN_HERE} - Configure S3a traffic to use this AccessPoint - -``` - -This configures access to the `sample-bucket` bucket for S3A, to go through the -new Access Point ARN. So, for example `s3a://sample-bucket/key` will now use your -configured ARN when getting data from S3 instead of your bucket. - -The `fs.s3a.accesspoint.required` property can also require all access to S3 to go through Access -Points. This has the advantage of increasing security inside a VPN / VPC as you only allow access -to known sources of data defined through Access Points. In case there is a need to access a bucket -directly (without Access Points) then you can use per bucket overrides to disable this setting on a -bucket by bucket basis i.e. `fs.s3a.bucket.{YOUR-BUCKET}.accesspoint.required`. - -```xml - - - fs.s3a.accesspoint.required - true - - - - fs.s3a.bucket.example-bucket.accesspoint.required - false - -``` - -Before using Access Points make sure you're not impacted by the following: -- `ListObjectsV1` is not supported, this is also deprecated on AWS S3 for performance reasons; -- The endpoint for S3 requests will automatically change from `s3.amazonaws.com` to use -`s3-accesspoint.REGION.amazonaws.{com | com.cn}` depending on the Access Point ARN. While -considering endpoints, if you have any custom signers that use the host endpoint property make -sure to update them if needed; - ## Requester Pays buckets S3A supports buckets with diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 37cf472277d27..4d506b6bfc491 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -218,6 +218,7 @@ everything uses the same HTTP connection pool. | `fs.s3a.executor.capacity` | `16` | Maximum threads for any single operation | | `fs.s3a.max.total.tasks` | `16` | Extra tasks which can be queued excluding prefetching operations | +### Timeouts. Network timeout options can be tuned to make the client fail faster *or* retry more. The choice is yours. Generally recovery is better, but sometimes fail-fast is more useful. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md index c5e807c964139..af4c6a76becb1 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3guard.md @@ -132,20 +132,17 @@ This auditing information can be used to identify opportunities to reduce load. Prints and optionally checks the status of a bucket. ```bash -hadoop s3guard bucket-info [-guarded] [-unguarded] [-auth] [-nonauth] [-magic] [-encryption ENCRYPTION] [-markers MARKER] s3a://BUCKET +hadoop s3guard bucket-info [-fips] [-magic] [-encryption ENCRYPTION] [-markers MARKER] s3a://BUCKET ``` Options -| argument | meaning | -|-----------|-------------| -| `-guarded` | Require S3Guard to be enabled. This will now always fail | -| `-unguarded` | Require S3Guard to be disabled. This will now always succeed | -| `-auth` | Require the S3Guard mode to be "authoritative". This will now always fail | -| `-nonauth` | Require the S3Guard mode to be "non-authoritative". This will now always fail | -| `-magic` | Require the S3 filesystem to be support the "magic" committer | -| `-markers` | Directory marker status: `aware`, `keep`, `delete`, `authoritative` | -| `-encryption ` | Require a specific encryption algorithm | +| argument | meaning | +|----------------------|---------------------------------------------------------------------| +| `-fips` | Require FIPS endopint to be in use | +| `-magic` | Require the S3 filesystem to be support the "magic" committer | +| `-markers` | Directory marker status: `aware`, `keep`, `delete`, `authoritative` | +| `-encryption ` | Require a specific encryption algorithm | The server side encryption options are not directly related to S3Guard, but it is often convenient to check them at the same time. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java index ded2f0b885079..ce6d8a7e1ef6f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java @@ -39,6 +39,7 @@ import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESSPOINT_REQUIRED; import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; +import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A; import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS; import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE; @@ -138,6 +139,7 @@ private Configuration createConfigurationWithProbe(final int probe) { removeBaseAndBucketOverrides(conf, S3A_BUCKET_PROBE, ENDPOINT, + FIPS_ENDPOINT, AWS_REGION, PATH_STYLE_ACCESS); conf.setInt(S3A_BUCKET_PROBE, probe); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index 5d10590dfe30f..5e6991128b201 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.net.URI; -import java.net.URISyntaxException; import java.net.UnknownHostException; import java.nio.file.AccessDeniedException; import java.util.ArrayList; @@ -36,16 +35,17 @@ import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.HeadBucketRequest; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; +import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.PATH_STYLE_ACCESS; +import static org.apache.hadoop.fs.s3a.DefaultS3ClientFactory.ERROR_ENDPOINT_WITH_FIPS; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.io.IOUtils.closeStream; -import static org.apache.hadoop.fs.s3a.Constants.CENTRAL_ENDPOINT; - import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -82,6 +82,8 @@ public class ITestS3AEndpointRegion extends AbstractS3ATestBase { private static final String VPC_ENDPOINT = "vpce-1a2b3c4d-5e6f.s3.us-west-2.vpce.amazonaws.com"; + public static final String EXCEPTION_THROWN_BY_INTERCEPTOR = "Exception thrown by interceptor"; + /** * New FS instance which will be closed in teardown. */ @@ -134,10 +136,9 @@ public void testEndpointOverride() throws Throwable { describe("Create a client with a configured endpoint"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST, null, US_EAST_2); + S3Client client = createS3Client(conf, AWS_ENDPOINT_TEST, null, US_EAST_2, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } @Test @@ -145,10 +146,9 @@ public void testCentralEndpoint() throws Throwable { describe("Create a client with the central endpoint"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_1); + S3Client client = createS3Client(conf, CENTRAL_ENDPOINT, null, US_EAST_1, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } @Test @@ -156,21 +156,40 @@ public void testWithRegionConfig() throws Throwable { describe("Create a client with a configured region"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, null, EU_WEST_2, EU_WEST_2); + S3Client client = createS3Client(conf, null, EU_WEST_2, EU_WEST_2, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } + @Test + public void testWithFips() throws Throwable { + describe("Create a client with fips enabled"); + + S3Client client = createS3Client(getConfiguration(), + null, EU_WEST_2, EU_WEST_2, true); + expectInterceptorException(client); + } + + /** + * Attempting to create a client with fips enabled and an endpoint specified + * fails during client construction. + */ + @Test + public void testWithFipsAndEndpoint() throws Throwable { + describe("Create a client with fips and an endpoint"); + + intercept(IllegalArgumentException.class, ERROR_ENDPOINT_WITH_FIPS, () -> + createS3Client(getConfiguration(), CENTRAL_ENDPOINT, null, US_EAST_1, true)); + } + @Test public void testEUWest2Endpoint() throws Throwable { describe("Create a client with the eu west 2 endpoint"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, null, EU_WEST_2); + S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, null, EU_WEST_2, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } @Test @@ -178,10 +197,9 @@ public void testWithRegionAndEndpointConfig() throws Throwable { describe("Test that when both region and endpoint are configured, region takes precedence"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, US_WEST_2, US_WEST_2); + S3Client client = createS3Client(conf, EU_WEST_2_ENDPOINT, US_WEST_2, US_WEST_2, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } @Test @@ -189,21 +207,43 @@ public void testWithChinaEndpoint() throws Throwable { describe("Test with a china endpoint"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, CN_ENDPOINT, null, CN_NORTHWEST_1); + S3Client client = createS3Client(conf, CN_ENDPOINT, null, CN_NORTHWEST_1, false); + + expectInterceptorException(client); + } + + /** + * Expect an exception to be thrown by the interceptor with the message + * {@link #EXCEPTION_THROWN_BY_INTERCEPTOR}. + * @param client client to issue a head request against. + * @return the expected exception. + * @throws Exception any other exception. + */ + private AwsServiceException expectInterceptorException(final S3Client client) + throws Exception { + + return intercept(AwsServiceException.class, EXCEPTION_THROWN_BY_INTERCEPTOR, + () -> head(client)); + } - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + /** + * Issue a head request against the bucket. + * @param client client to use + * @return the response. + */ + private HeadBucketResponse head(final S3Client client) { + return client.headBucket( + HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build()); } @Test public void testWithGovCloudEndpoint() throws Throwable { - describe("Test with a gov cloud endpoint"); + describe("Test with a gov cloud endpoint; enable fips"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, GOV_ENDPOINT, null, US_GOV_EAST_1); + S3Client client = createS3Client(conf, GOV_ENDPOINT, null, US_GOV_EAST_1, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } @Test @@ -212,19 +252,20 @@ public void testWithVPCE() throws Throwable { describe("Test with vpc endpoint"); Configuration conf = getConfiguration(); - S3Client client = createS3Client(conf, VPC_ENDPOINT, null, US_WEST_2); + S3Client client = createS3Client(conf, VPC_ENDPOINT, null, US_WEST_2, false); - intercept(AwsServiceException.class, "Exception thrown by interceptor", () -> client.headBucket( - HeadBucketRequest.builder().bucket(getFileSystem().getBucket()).build())); + expectInterceptorException(client); } - class RegionInterceptor implements ExecutionInterceptor { - private String endpoint; - private String region; + private final class RegionInterceptor implements ExecutionInterceptor { + private final String endpoint; + private final String region; + private final boolean isFips; - RegionInterceptor(String endpoint, String region) { + RegionInterceptor(String endpoint, String region, final boolean isFips) { this.endpoint = endpoint; this.region = region; + this.isFips = isFips; } @Override @@ -249,8 +290,15 @@ public void beforeExecution(Context.BeforeExecution context, executionAttributes.getAttribute(AwsExecutionAttribute.AWS_REGION).toString()) .describedAs("Incorrect region set").isEqualTo(region); + // verify the fips state matches expectation. + Assertions.assertThat(executionAttributes.getAttribute( + AwsExecutionAttribute.FIPS_ENDPOINT_ENABLED)) + .describedAs("Incorrect FIPS flag set in execution attributes") + .isNotNull() + .isEqualTo(isFips); + // We don't actually want to make a request, so exit early. - throw AwsServiceException.builder().message("Exception thrown by interceptor").build(); + throw AwsServiceException.builder().message(EXCEPTION_THROWN_BY_INTERCEPTOR).build(); } } @@ -261,17 +309,17 @@ public void beforeExecution(Context.BeforeExecution context, * @param conf configuration to use. * @param endpoint endpoint. * @param expectedRegion the region that should be set in the client. + * @param isFips is this a FIPS endpoint? * @return the client. - * @throws URISyntaxException parse problems. * @throws IOException IO problems */ @SuppressWarnings("deprecation") private S3Client createS3Client(Configuration conf, - String endpoint, String configuredRegion, String expectedRegion) + String endpoint, String configuredRegion, String expectedRegion, boolean isFips) throws IOException { List interceptors = new ArrayList<>(); - interceptors.add(new RegionInterceptor(endpoint, expectedRegion)); + interceptors.add(new RegionInterceptor(endpoint, expectedRegion, isFips)); DefaultS3ClientFactory factory = new DefaultS3ClientFactory(); @@ -283,8 +331,8 @@ private S3Client createS3Client(Configuration conf, .withMetrics(new EmptyS3AStatisticsContext() .newStatisticsFromAwsSdk()) .withExecutionInterceptors(interceptors) - .withRegion(configuredRegion); - + .withRegion(configuredRegion) + .withFipsEnabled(isFips); S3Client client = factory.createS3Client( getFileSystem().getUri(), diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java index 28bc2a246af1a..08696ae62d249 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/s3guard/ITestS3GuardTool.java @@ -33,6 +33,8 @@ import org.apache.hadoop.test.LambdaTestUtils; import org.apache.hadoop.util.StringUtils; +import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; +import static org.apache.hadoop.fs.s3a.Constants.FIPS_ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.S3_ENCRYPTION_ALGORITHM; import static org.apache.hadoop.fs.s3a.MultipartTestUtils.assertNoUploadsAt; import static org.apache.hadoop.fs.s3a.MultipartTestUtils.clearAnyUploads; @@ -97,6 +99,20 @@ public void testStoreInfo() throws Throwable { LOG.info("Exec output=\n{}", output); } + @Test + public void testStoreInfoFips() throws Throwable { + final S3AFileSystem fs = getFileSystem(); + if (!fs.hasPathCapability(new Path("/"), FIPS_ENDPOINT)) { + skip("FIPS not enabled"); + } + S3GuardTool.BucketInfo cmd = + toClose(new S3GuardTool.BucketInfo(fs.getConf())); + String output = exec(cmd, cmd.getName(), + "-" + BucketInfo.FIPS_FLAG, + fs.getUri().toString()); + LOG.info("Exec output=\n{}", output); + } + private final static String UPLOAD_NAME = "test-upload"; @Test diff --git a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml index f871369ed5715..c99d7d43134cb 100644 --- a/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml +++ b/hadoop-tools/hadoop-aws/src/test/resources/core-site.xml @@ -56,6 +56,12 @@ Do not add the referrer header to landsat operations + + fs.s3a.bucket.landsat-pds.endpoint.fips + true + Use the fips endpoint + + fs.s3a.bucket.usgs-landsat.endpoint.region From 736ea5e04b5af9e42081d2c7d5c381b69413b7d1 Mon Sep 17 00:00:00 2001 From: Mukund Thakur Date: Tue, 16 Jan 2024 17:06:28 -0600 Subject: [PATCH 014/164] HADOOP-19015. Increase fs.s3a.connection.maximum to 500 to minimize risk of Timeout waiting for connection from pool. (#6372) HADOOP-19015. Increase fs.s3a.connection.maximum to 500 to minimize the risk of Timeout waiting for connection from the pool Contributed By: Mukund Thakur --- .../hadoop-common/src/main/resources/core-default.xml | 3 ++- .../src/main/java/org/apache/hadoop/fs/s3a/Constants.java | 2 +- .../src/site/markdown/tools/hadoop-aws/performance.md | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 5a5171056d048..29ec06db65989 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1530,7 +1530,7 @@ fs.s3a.connection.maximum - 200 + 500 Controls the maximum number of simultaneous connections to S3. This must be bigger than the value of fs.s3a.threads.max so as to stop threads being blocked waiting for new HTTPS connections. @@ -1538,6 +1538,7 @@ + fs.s3a.connection.ssl.enabled true diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index e33f762cdfcf7..636438afef25c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -173,7 +173,7 @@ private Constants() { * Future releases are likely to increase this value. * Keep in sync with the value in {@code core-default.xml} */ - public static final int DEFAULT_MAXIMUM_CONNECTIONS = 200; + public static final int DEFAULT_MAXIMUM_CONNECTIONS = 500; /** * Configuration option to configure expiration time of diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md index 4d506b6bfc491..4c03cca17161f 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/performance.md @@ -226,7 +226,7 @@ The choice is yours. Generally recovery is better, but sometimes fail-fast is mo | Property | Default | V2 | Meaning | |-----------------------------------------|---------|:----|-------------------------------------------------------| -| `fs.s3a.connection.maximum` | `200` | | Connection pool size | +| `fs.s3a.connection.maximum` | `500` | | Connection pool size | | `fs.s3a.connection.keepalive` | `false` | `*` | Use TCP keepalive on open channels | | `fs.s3a.connection.acquisition.timeout` | `60s` | `*` | Timeout for waiting for a connection from the pool. | | `fs.s3a.connection.establish.timeout` | `30s` | | Time to establish the TCP/TLS connection | From e13bd988bda7a9f45b00eea3e7089ccf8fabb5fe Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 17 Jan 2024 18:34:14 +0000 Subject: [PATCH 015/164] HADOOP-19033. S3A: disable checksums when fs.s3a.checksum.validation = false (#6441) Add new option fs.s3a.checksum.validation, default false, which is used when creating s3 clients to enable/disable checksum validation. When false, GET response processing is measurably faster. Contributed by Steve Loughran. --- .../org/apache/hadoop/fs/s3a/Constants.java | 15 ++++++ .../hadoop/fs/s3a/DefaultS3ClientFactory.java | 15 ++++-- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 4 +- .../apache/hadoop/fs/s3a/S3AInputStream.java | 11 ++++ .../apache/hadoop/fs/s3a/S3ClientFactory.java | 20 +++++++ .../apache/hadoop/fs/s3a/S3ATestUtils.java | 54 +++++++++++++++++++ .../fs/s3a/performance/ITestS3AOpenCost.java | 47 ++++++++++++++++ .../performance/ITestUnbufferDraining.java | 22 +++++++- 8 files changed, 180 insertions(+), 8 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 636438afef25c..4408cf68a451e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -1568,4 +1568,19 @@ private Constants() { * is true: {@value}. */ public static final String HTTP_SIGNER_CLASS_NAME = "fs.s3a.http.signer.class"; + + /** + * Should checksums be validated on download? + * This is slower and not needed on TLS connections. + * Value: {@value}. + */ + public static final String CHECKSUM_VALIDATION = + "fs.s3a.checksum.validation"; + + /** + * Default value of {@link #CHECKSUM_VALIDATION}. + * Value: {@value}. + */ + public static final boolean CHECKSUM_VALIDATION_DEFAULT = false; + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java index 0fde93e6548bb..0a3267a9fe51d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/DefaultS3ClientFactory.java @@ -179,11 +179,15 @@ private , ClientT> Build configureEndpointAndRegion(builder, parameters, conf); S3Configuration serviceConfiguration = S3Configuration.builder() - .pathStyleAccessEnabled(parameters.isPathStyleAccess()) - .build(); + .pathStyleAccessEnabled(parameters.isPathStyleAccess()) + .checksumValidationEnabled(parameters.isChecksumValidationEnabled()) + .build(); + + final ClientOverrideConfiguration.Builder override = + createClientOverrideConfiguration(parameters, conf); S3BaseClientBuilder s3BaseClientBuilder = builder - .overrideConfiguration(createClientOverrideConfiguration(parameters, conf)) + .overrideConfiguration(override.build()) .credentialsProvider(parameters.getCredentialSet()) .disableS3ExpressSessionAuth(!parameters.isExpressCreateSession()) .serviceConfiguration(serviceConfiguration); @@ -204,8 +208,9 @@ private , ClientT> Build * @throws IOException any IOE raised, or translated exception * @throws RuntimeException some failures creating an http signer * @return the override configuration + * @throws IOException any IOE raised, or translated exception */ - protected ClientOverrideConfiguration createClientOverrideConfiguration( + protected ClientOverrideConfiguration.Builder createClientOverrideConfiguration( S3ClientCreationParameters parameters, Configuration conf) throws IOException { final ClientOverrideConfiguration.Builder clientOverrideConfigBuilder = AWSClientConfig.createClientConfigBuilder(conf, AWS_SERVICE_IDENTIFIER_S3); @@ -237,7 +242,7 @@ protected ClientOverrideConfiguration createClientOverrideConfiguration( final RetryPolicy.Builder retryPolicyBuilder = AWSClientConfig.createRetryPolicyBuilder(conf); clientOverrideConfigBuilder.retryPolicy(retryPolicyBuilder.build()); - return clientOverrideConfigBuilder.build(); + return clientOverrideConfigBuilder; } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 1aad1ad2f858f..c5e6e09a835eb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -1055,7 +1055,9 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { .withRegion(configuredRegion) .withFipsEnabled(fipsEnabled) .withExpressCreateSession( - conf.getBoolean(S3EXPRESS_CREATE_SESSION, S3EXPRESS_CREATE_SESSION_DEFAULT)); + conf.getBoolean(S3EXPRESS_CREATE_SESSION, S3EXPRESS_CREATE_SESSION_DEFAULT)) + .withChecksumValidationEnabled( + conf.getBoolean(CHECKSUM_VALIDATION, CHECKSUM_VALIDATION_DEFAULT)); S3ClientFactory clientFactory = ReflectionUtils.newInstance(s3ClientFactoryClass, conf); s3Client = clientFactory.createS3Client(getUri(), parameters); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java index 3d2ecc77376bf..9f04e11d945a8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInputStream.java @@ -1304,6 +1304,17 @@ public IOStatistics getIOStatistics() { return ioStatistics; } + /** + * Get the wrapped stream. + * This is for testing only. + * + * @return the wrapped stream, or null if there is none. + */ + @VisibleForTesting + public ResponseInputStream getWrappedStream() { + return wrappedStream; + } + /** * Callbacks for input stream IO. */ diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java index 404a255528ff4..0b01876ae504f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ClientFactory.java @@ -176,6 +176,11 @@ final class S3ClientCreationParameters { */ private boolean expressCreateSession = S3EXPRESS_CREATE_SESSION_DEFAULT; + /** + * Enable checksum validation. + */ + private boolean checksumValidationEnabled; + /** * Is FIPS enabled? */ @@ -451,6 +456,20 @@ public S3ClientCreationParameters withExpressCreateSession(final boolean value) return this; } + /** + * Set builder value. + * @param value new value + * @return the builder + */ + public S3ClientCreationParameters withChecksumValidationEnabled(final boolean value) { + checksumValidationEnabled = value; + return this; + } + + public boolean isChecksumValidationEnabled() { + return checksumValidationEnabled; + } + @Override public String toString() { return "S3ClientCreationParameters{" + @@ -464,6 +483,7 @@ public String toString() { ", multipartCopy=" + multipartCopy + ", region='" + region + '\'' + ", expressCreateSession=" + expressCreateSession + + ", checksumValidationEnabled=" + checksumValidationEnabled + '}'; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index 6dc3ca11028a6..ed1fda316dfe5 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -73,13 +73,19 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.core.ResponseInputStream; import software.amazon.awssdk.core.exception.SdkClientException; +import software.amazon.awssdk.core.internal.io.ChecksumValidatingInputStream; +import software.amazon.awssdk.services.s3.internal.checksums.S3ChecksumValidatingInputStream; +import software.amazon.awssdk.services.s3.model.GetObjectResponse; import java.io.Closeable; import java.io.File; +import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UncheckedIOException; +import java.lang.reflect.Field; import java.net.URI; import java.net.URISyntaxException; import java.nio.charset.StandardCharsets; @@ -1663,6 +1669,54 @@ public static S3AInputStream getS3AInputStream( } } + /** + * Get the inner stream of a FilterInputStream. + * Uses reflection to access a protected field. + * @param fis input stream. + * @return the inner stream. + */ + public static InputStream getInnerStream(FilterInputStream fis) { + try { + final Field field = FilterInputStream.class.getDeclaredField("in"); + field.setAccessible(true); + return (InputStream) field.get(fis); + } catch (IllegalAccessException | NoSuchFieldException e) { + throw new AssertionError("Failed to get inner stream: " + e, e); + } + } + + /** + * Get the innermost stream of a chain of FilterInputStreams. + * This allows tests into the internals of an AWS SDK stream chain. + * @param fis input stream. + * @return the inner stream. + */ + public static InputStream getInnermostStream(FilterInputStream fis) { + InputStream inner = fis; + while (inner instanceof FilterInputStream) { + inner = getInnerStream((FilterInputStream) inner); + } + return inner; + } + + /** + * Verify that an s3a stream is not checksummed. + * The inner stream must be active. + */ + public static void assertStreamIsNotChecksummed(final S3AInputStream wrappedS3A) { + final ResponseInputStream wrappedStream = + wrappedS3A.getWrappedStream(); + Assertions.assertThat(wrappedStream) + .describedAs("wrapped stream is not open: call read() on %s", wrappedS3A) + .isNotNull(); + + final InputStream inner = getInnermostStream(wrappedStream); + Assertions.assertThat(inner) + .describedAs("innermost stream of %s", wrappedS3A) + .isNotInstanceOf(ChecksumValidatingInputStream.class) + .isNotInstanceOf(S3ChecksumValidatingInputStream.class); + } + /** * Disable Prefetching streams from S3AFileSystem in tests. * @param conf Configuration to remove the prefetch property from. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java index 361c376cffd7f..63b25f9c8874b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestS3AOpenCost.java @@ -20,6 +20,7 @@ import java.io.EOFException; +import java.io.InputStream; import java.nio.ByteBuffer; import java.util.Arrays; import java.util.concurrent.TimeUnit; @@ -29,6 +30,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataInputStream; import org.apache.hadoop.fs.FileRange; import org.apache.hadoop.fs.FileStatus; @@ -45,8 +47,15 @@ import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_RANDOM; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; +import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE; import static org.apache.hadoop.fs.contract.ContractTestUtils.readStream; +import static org.apache.hadoop.fs.contract.ContractTestUtils.skip; import static org.apache.hadoop.fs.contract.ContractTestUtils.writeTextFile; +import static org.apache.hadoop.fs.s3a.Constants.CHECKSUM_VALIDATION; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.assertStreamIsNotChecksummed; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.getS3AInputStream; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.fs.s3a.Statistic.STREAM_READ_BYTES_READ_CLOSE; import static org.apache.hadoop.fs.s3a.Statistic.STREAM_READ_OPENED; import static org.apache.hadoop.fs.s3a.Statistic.STREAM_READ_SEEK_BYTES_SKIPPED; @@ -79,6 +88,16 @@ public ITestS3AOpenCost() { super(true); } + @Override + public Configuration createConfiguration() { + Configuration conf = super.createConfiguration(); + removeBaseAndBucketOverrides(conf, + CHECKSUM_VALIDATION); + conf.setBoolean(CHECKSUM_VALIDATION, false); + disableFilesystemCaching(conf); + return conf; + } + /** * Setup creates a test file, saves is status and length * to fields. @@ -139,6 +158,34 @@ public void testOpenFileWithStatusOfOtherFS() throws Throwable { assertEquals("bytes read from file", fileLength, readLen); } + @Test + public void testStreamIsNotChecksummed() throws Throwable { + describe("Verify that an opened stream is not checksummed"); + S3AFileSystem fs = getFileSystem(); + // open the file + try (FSDataInputStream in = verifyMetrics(() -> + fs.openFile(testFile) + .must(FS_OPTION_OPENFILE_READ_POLICY, + FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE) + .mustLong(FS_OPTION_OPENFILE_LENGTH, fileLength) + .build() + .get(), + always(NO_HEAD_OR_LIST), + with(STREAM_READ_OPENED, 0))) { + + // if prefetching is enabled, skip this test + final InputStream wrapped = in.getWrappedStream(); + if (!(wrapped instanceof S3AInputStream)) { + skip("Not an S3AInputStream: " + wrapped); + } + + // open the stream. + in.read(); + // now examine the innermost stream and make sure it doesn't have a checksum + assertStreamIsNotChecksummed(getS3AInputStream(in)); + } + } + @Test public void testOpenFileShorterLength() throws Throwable { // do a second read with the length declared as short. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestUnbufferDraining.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestUnbufferDraining.java index b77ca97c7ddfa..00bae1519f5eb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestUnbufferDraining.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestUnbufferDraining.java @@ -43,6 +43,7 @@ import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE; import static org.apache.hadoop.fs.contract.ContractTestUtils.dataset; import static org.apache.hadoop.fs.s3a.Constants.ASYNC_DRAIN_THRESHOLD; +import static org.apache.hadoop.fs.s3a.Constants.CHECKSUM_VALIDATION; import static org.apache.hadoop.fs.s3a.Constants.ESTABLISH_TIMEOUT; import static org.apache.hadoop.fs.s3a.Constants.INPUT_FADVISE; import static org.apache.hadoop.fs.s3a.Constants.MAXIMUM_CONNECTIONS; @@ -84,6 +85,11 @@ public class ITestUnbufferDraining extends AbstractS3ACostTest { */ public static final int ATTEMPTS = 10; + /** + * Should checksums be enabled? + */ + public static final boolean CHECKSUMS = false; + /** * Test FS with a tiny connection pool and * no recovery. @@ -102,6 +108,7 @@ public Configuration createConfiguration() { Configuration conf = super.createConfiguration(); removeBaseAndBucketOverrides(conf, ASYNC_DRAIN_THRESHOLD, + CHECKSUM_VALIDATION, ESTABLISH_TIMEOUT, INPUT_FADVISE, MAX_ERROR_RETRIES, @@ -111,7 +118,7 @@ public Configuration createConfiguration() { REQUEST_TIMEOUT, RETRY_LIMIT, SOCKET_TIMEOUT); - + conf.setBoolean(CHECKSUM_VALIDATION, CHECKSUMS); return conf; } @@ -132,6 +139,7 @@ public void setup() throws Exception { conf.setInt(MAX_ERROR_RETRIES, 1); conf.setInt(READAHEAD_RANGE, READAHEAD); conf.setInt(RETRY_LIMIT, 1); + conf.setBoolean(CHECKSUM_VALIDATION, CHECKSUMS); setDurationAsSeconds(conf, ESTABLISH_TIMEOUT, Duration.ofSeconds(1)); @@ -221,12 +229,22 @@ private static long lookupCounter( */ private static void assertReadPolicy(final FSDataInputStream in, final S3AInputPolicy policy) { - S3AInputStream inner = (S3AInputStream) in.getWrappedStream(); + S3AInputStream inner = getS3AInputStream(in); Assertions.assertThat(inner.getInputPolicy()) .describedAs("input policy of %s", inner) .isEqualTo(policy); } + /** + * Extract the inner stream from an FSDataInputStream. + * Because prefetching is disabled, this is always an S3AInputStream. + * @param in input stream + * @return the inner stream cast to an S3AInputStream. + */ + private static S3AInputStream getS3AInputStream(final FSDataInputStream in) { + return (S3AInputStream) in.getWrappedStream(); + } + /** * Test stream close performance/behavior with unbuffer * aborting rather than draining. From 3f03d784dcf1d8af8962025b4a9341a8bac30a70 Mon Sep 17 00:00:00 2001 From: slfan1989 <55643692+slfan1989@users.noreply.github.com> Date: Thu, 25 Jan 2024 15:42:21 +0800 Subject: [PATCH 016/164] HADOOP-19039. Hadoop 3.4.0 Highlight big features and improvements. (#6462) Contributed by Shilun Fan. Reviewed-by: He Xiaoqiao Signed-off-by: Shilun Fan --- hadoop-project/src/site/markdown/index.md.vm | 160 ++++++++++++------- 1 file changed, 99 insertions(+), 61 deletions(-) diff --git a/hadoop-project/src/site/markdown/index.md.vm b/hadoop-project/src/site/markdown/index.md.vm index 33c86bbc06e9a..f3f9c41deb5cd 100644 --- a/hadoop-project/src/site/markdown/index.md.vm +++ b/hadoop-project/src/site/markdown/index.md.vm @@ -15,7 +15,7 @@ Apache Hadoop ${project.version} ================================ -Apache Hadoop ${project.version} is an update to the Hadoop 3.3.x release branch. +Apache Hadoop ${project.version} is an update to the Hadoop 3.4.x release branch. Overview of Changes =================== @@ -23,86 +23,124 @@ Overview of Changes Users are encouraged to read the full set of release notes. This page provides an overview of the major changes. -Azure ABFS: Critical Stream Prefetch Fix +S3A: Upgrade AWS SDK to V2 ---------------------------------------- -The abfs has a critical bug fix -[HADOOP-18546](https://issues.apache.org/jira/browse/HADOOP-18546). -*ABFS. Disable purging list of in-progress reads in abfs stream close().* +[HADOOP-18073](https://issues.apache.org/jira/browse/HADOOP-18073) S3A: Upgrade AWS SDK to V2 -All users of the abfs connector in hadoop releases 3.3.2+ MUST either upgrade -or disable prefetching by setting `fs.azure.readaheadqueue.depth` to `0` +This release upgrade Hadoop's AWS connector S3A from AWS SDK for Java V1 to AWS SDK for Java V2. +This is a significant change which offers a number of new features including the ability to work with Amazon S3 Express One Zone Storage - the new high performance, single AZ storage class. -Consult the parent JIRA [HADOOP-18521](https://issues.apache.org/jira/browse/HADOOP-18521) -*ABFS ReadBufferManager buffer sharing across concurrent HTTP requests* -for root cause analysis, details on what is affected, and mitigations. +HDFS DataNode Split one FsDatasetImpl lock to volume grain locks +---------------------------------------- + +[HDFS-15382](https://issues.apache.org/jira/browse/HDFS-15382) Split one FsDatasetImpl lock to volume grain locks. + +Throughput is one of the core performance evaluation for DataNode instance. +However, it does not reach the best performance especially for Federation deploy all the time although there are different improvement, +because of the global coarse-grain lock. +These series issues (include [HDFS-16534](https://issues.apache.org/jira/browse/HDFS-16534), [HDFS-16511](https://issues.apache.org/jira/browse/HDFS-16511), [HDFS-15382](https://issues.apache.org/jira/browse/HDFS-15382) and [HDFS-16429](https://issues.apache.org/jira/browse/HDFS-16429).) +try to split the global coarse-grain lock to fine-grain lock which is double level lock for blockpool and volume, +to improve the throughput and avoid lock impacts between blockpools and volumes. + +YARN Federation improvements +---------------------------------------- + +[YARN-5597](https://issues.apache.org/jira/browse/YARN-5597) YARN Federation improvements. + +We have enhanced the YARN Federation functionality for improved usability. The enhanced features are as follows: +1. YARN Router now boasts a full implementation of all interfaces including the ApplicationClientProtocol, ResourceManagerAdministrationProtocol, and RMWebServiceProtocol. +2. YARN Router support for application cleanup and automatic offline mechanisms for subCluster. +3. Code improvements were undertaken for the Router and AMRMProxy, along with enhancements to previously pending functionalities. +4. Audit logs and Metrics for Router received upgrades. +5. A boost in cluster security features was achieved, with the inclusion of Kerberos support. +6. The page function of the router has been enhanced. +7. A set of commands has been added to the Router side for operating on SubClusters and Policies. + +HDFS RBF: Code Enhancements, New Features, and Bug Fixes +---------------------------------------- + +The HDFS RBF functionality has undergone significant enhancements, encompassing over 200 commits for feature +improvements, new functionalities, and bug fixes. +Important features and improvements are as follows: + +**Feature** + +[HDFS-15294](https://issues.apache.org/jira/browse/HDFS-15294) HDFS Federation balance tool introduces one tool to balance data across different namespace. +**Improvement** -Vectored IO API ---------------- +[HDFS-17128](https://issues.apache.org/jira/browse/HDFS-17128) RBF: SQLDelegationTokenSecretManager should use version of tokens updated by other routers. -[HADOOP-18103](https://issues.apache.org/jira/browse/HADOOP-18103). -*High performance vectored read API in Hadoop* +The SQLDelegationTokenSecretManager enhances performance by maintaining processed tokens in memory. However, there is +a potential issue of router cache inconsistency due to token loading and renewal. This issue has been addressed by the +resolution of HDFS-17128. -The `PositionedReadable` interface has now added an operation for -Vectored IO (also known as Scatter/Gather IO): +[HDFS-17148](https://issues.apache.org/jira/browse/HDFS-17148) RBF: SQLDelegationTokenSecretManager must cleanup expired tokens in SQL. -```java -void readVectored(List ranges, IntFunction allocate) -``` +SQLDelegationTokenSecretManager, while fetching and temporarily storing tokens from SQL in a memory cache with a short TTL, +faces an issue where expired tokens are not efficiently cleaned up, leading to a buildup of expired tokens in the SQL database. +This issue has been addressed by the resolution of HDFS-17148. + +**Others** + +Other changes to HDFS RBF include WebUI, command line, and other improvements. Please refer to the release document. + +HDFS EC: Code Enhancements and Bug Fixes +---------------------------------------- -All the requested ranges will be retrieved into the supplied byte buffers -possibly asynchronously, -possibly in parallel, with results potentially coming in out-of-order. +HDFS EC has made code improvements and fixed some bugs. -1. The default implementation uses a series of `readFully()` calls, so delivers - equivalent performance. -2. The local filesystem uses java native IO calls for higher performance reads than `readFully()`. -3. The S3A filesystem issues parallel HTTP GET requests in different threads. +Important improvements and bugs are as follows: -Benchmarking of enhanced Apache ORC and Apache Parquet clients through `file://` and `s3a://` -show significant improvements in query performance. +**Improvement** -Further Reading: -* [FsDataInputStream](./hadoop-project-dist/hadoop-common/filesystem/fsdatainputstream.html). -* [Hadoop Vectored IO: Your Data Just Got Faster!](https://apachecon.com/acasia2022/sessions/bigdata-1148.html) - Apachecon 2022 talk. +[HDFS-16613](https://issues.apache.org/jira/browse/HDFS-16613) EC: Improve performance of decommissioning dn with many ec blocks. -Mapreduce: Manifest Committer for Azure ABFS and google GCS ----------------------------------------------------------- +In a hdfs cluster with a lot of EC blocks, decommission a dn is very slow. The reason is unlike replication blocks can be replicated +from any dn which has the same block replication, the ec block have to be replicated from the decommissioning dn. +The configurations `dfs.namenode.replication.max-streams` and `dfs.namenode.replication.max-streams-hard-limit` will limit +the replication speed, but increase these configurations will create risk to the whole cluster's network. So it should add a new +configuration to limit the decommissioning dn, distinguished from the cluster wide max-streams limit. -The new _Intermediate Manifest Committer_ uses a manifest file -to commit the work of successful task attempts, rather than -renaming directories. -Job commit is matter of reading all the manifests, creating the -destination directories (parallelized) and renaming the files, -again in parallel. +[HDFS-16663](https://issues.apache.org/jira/browse/HDFS-16663) EC: Allow block reconstruction pending timeout refreshable to increase decommission performance. -This is both fast and correct on Azure Storage and Google GCS, -and should be used there instead of the classic v1/v2 file -output committers. +In [HDFS-16613](https://issues.apache.org/jira/browse/HDFS-16613), increase the value of `dfs.namenode.replication.max-streams-hard-limit` would maximize the IO +performance of the decommissioning DN, which has a lot of EC blocks. Besides this, we also need to decrease the value of +`dfs.namenode.reconstruction.pending.timeout-sec`, default is 5 minutes, to shorten the interval time for checking +pendingReconstructions. Or the decommissioning node would be idle to wait for copy tasks in most of this 5 minutes. +In decommission progress, we may need to reconfigure these 2 parameters several times. In [HDFS-14560](https://issues.apache.org/jira/browse/HDFS-14560), the +`dfs.namenode.replication.max-streams-hard-limit` can already be reconfigured dynamically without namenode restart. And +the `dfs.namenode.reconstruction.pending.timeout-sec` parameter also need to be reconfigured dynamically. -It is also safe to use on HDFS, where it should be faster -than the v1 committer. It is however optimized for -cloud storage where list and rename operations are significantly -slower; the benefits may be less. +**Bug** -More details are available in the -[manifest committer](./hadoop-mapreduce-client/hadoop-mapreduce-client-core/manifest_committer.html). -documentation. +[HDFS-16456](https://issues.apache.org/jira/browse/HDFS-16456) EC: Decommission a rack with only on dn will fail when the rack number is equal with replication. +In below scenario, decommission will fail by `TOO_MANY_NODES_ON_RACK` reason: +- Enable EC policy, such as RS-6-3-1024k. +- The rack number in this cluster is equal with or less than the replication number(9) +- A rack only has one DN, and decommission this DN. +This issue has been addressed by the resolution of HDFS-16456. -HDFS: Dynamic Datanode Reconfiguration --------------------------------------- +[HDFS-17094](https://issues.apache.org/jira/browse/HDFS-17094) EC: Fix bug in block recovery when there are stale datanodes. +During block recovery, the `RecoveryTaskStriped` in the datanode expects a one-to-one correspondence between +`rBlock.getLocations()` and `rBlock.getBlockIndices()`. However, if there are stale locations during a NameNode heartbeat, +this correspondence may be disrupted. Specifically, although there are no stale locations in `recoveryLocations`, the block indices +array remains complete. This discrepancy causes `BlockRecoveryWorker.RecoveryTaskStriped#recover` to generate an incorrect +internal block ID, leading to a failure in the recovery process as the corresponding datanode cannot locate the replica. +This issue has been addressed by the resolution of HDFS-17094. -HDFS-16400, HDFS-16399, HDFS-16396, HDFS-16397, HDFS-16413, HDFS-16457. +[HDFS-17284](https://issues.apache.org/jira/browse/HDFS-17284). EC: Fix int overflow in calculating numEcReplicatedTasks and numReplicationTasks during block recovery. +Due to an integer overflow in the calculation of numReplicationTasks or numEcReplicatedTasks, the NameNode's configuration +parameter `dfs.namenode.replication.max-streams-hard-limit` failed to take effect. This led to an excessive number of tasks +being sent to the DataNodes, consequently occupying too much of their memory. -A number of Datanode configuration options can be changed without having to restart -the datanode. This makes it possible to tune deployment configurations without -cluster-wide Datanode Restarts. +This issue has been addressed by the resolution of HDFS-17284. -See [DataNode.java](https://github.com/apache/hadoop/blob/branch-3.3.5/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java#L346-L361) -for the list of dynamically reconfigurable attributes. +**Others** +Other improvements and fixes for HDFS EC, Please refer to the release document. Transitive CVE fixes -------------------- @@ -110,8 +148,8 @@ Transitive CVE fixes A lot of dependencies have been upgraded to address recent CVEs. Many of the CVEs were not actually exploitable through the Hadoop so much of this work is just due diligence. -However applications which have all the library is on a class path may -be vulnerable, and the ugprades should also reduce the number of false +However, applications which have all the library is on a class path may +be vulnerable, and the upgrades should also reduce the number of false positives security scanners report. We have not been able to upgrade every single dependency to the latest @@ -147,12 +185,12 @@ can, with care, keep data and computing resources private. 1. Physical cluster: *configure Hadoop security*, usually bonded to the enterprise Kerberos/Active Directory systems. Good. -1. Cloud: transient or persistent single or multiple user/tenant cluster +2. Cloud: transient or persistent single or multiple user/tenant cluster with private VLAN *and security*. Good. Consider [Apache Knox](https://knox.apache.org/) for managing remote access to the cluster. -1. Cloud: transient single user/tenant cluster with private VLAN +3. Cloud: transient single user/tenant cluster with private VLAN *and no security at all*. Requires careful network configuration as this is the sole means of securing the cluster.. From 7212fbf7ffa98f29de831b72eb4f3825babdf8f1 Mon Sep 17 00:00:00 2001 From: Benjamin Teke Date: Fri, 26 Jan 2024 06:33:55 +0100 Subject: [PATCH 017/164] HADOOP-19051: Highlight Capacity Scheduler new features in release for the release 3.4.0 (#6500) Contributed by Benjamin Teke. Signed-off-by: Shilun Fan --- hadoop-project/src/site/markdown/index.md.vm | 21 ++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/hadoop-project/src/site/markdown/index.md.vm b/hadoop-project/src/site/markdown/index.md.vm index f3f9c41deb5cd..7c1b3ef5eb402 100644 --- a/hadoop-project/src/site/markdown/index.md.vm +++ b/hadoop-project/src/site/markdown/index.md.vm @@ -57,6 +57,27 @@ We have enhanced the YARN Federation functionality for improved usability. The e 6. The page function of the router has been enhanced. 7. A set of commands has been added to the Router side for operating on SubClusters and Policies. +YARN Capacity Scheduler improvements +---------------------------------------- + +[YARN-10496](https://issues.apache.org/jira/browse/YARN-10496) Support Flexible Auto Queue Creation in Capacity Scheduler + +Capacity Scheduler resource distribution mode was extended with a new allocation mode called weight mode. +Defining queue capacities with weights allows the users to use the newly added flexible queue auto creation mode. +Flexible mode now supports the dynamic creation of both **parent queues** and **leaf queues**, enabling the creation of +complex queue hierarchies application submission time. + +[YARN-10888](https://issues.apache.org/jira/browse/YARN-10888) New capacity modes for Capacity Scheduler + +Capacity Scheduler's resource distribution was completely refactored to be more flexible and extensible. There is a new concept +called Capacity Vectors, which allows the users to mix various resource types in the hierarchy, and also in a single queue. With +this optionally enabled feature it is now possible to define different resources with different units, like memory with GBs, vcores with +percentage values, and GPUs/FPGAs with weights, all in the same queue. + +[YARN-10889](https://issues.apache.org/jira/browse/YARN-10889) Queue Creation in Capacity Scheduler - Various improvements + +In addition to the two new features above, there were a number of commits for improvements and bug fixes in Capacity Scheduler. + HDFS RBF: Code Enhancements, New Features, and Bug Fixes ---------------------------------------- From c08d891b9247ab2ab48a936bd0586946eb9127fe Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Sun, 21 Jan 2024 19:00:34 +0000 Subject: [PATCH 018/164] HADOOP-19046. S3A: update AWS V2 SDK to 2.23.5; v1 to 1.12.599 (#6467) This update ensures that the timeout set in fs.s3a.connection.request.timeout is passed down to calls to CreateSession made in the AWS SDK to get S3 Express session tokens. Contributed by Steve Loughran --- LICENSE-binary | 4 ++-- NOTICE-binary | 2 +- hadoop-project/pom.xml | 4 ++-- .../src/site/markdown/tools/hadoop-aws/testing.md | 13 ++++++++++--- 4 files changed, 15 insertions(+), 8 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 3720a78095635..93e38cc34ee4c 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -215,7 +215,7 @@ com.aliyun:aliyun-java-sdk-kms:2.11.0 com.aliyun:aliyun-java-sdk-ram:3.1.0 com.aliyun:aliyun-java-sdk-sts:3.0.0 com.aliyun.oss:aliyun-sdk-oss:3.13.2 -com.amazonaws:aws-java-sdk-bundle:1.12.565 +com.amazonaws:aws-java-sdk-bundle:1.12.599 com.cedarsoftware:java-util:1.9.0 com.cedarsoftware:json-io:2.5.1 com.fasterxml.jackson.core:jackson-annotations:2.12.7 @@ -363,7 +363,7 @@ org.objenesis:objenesis:2.6 org.xerial.snappy:snappy-java:1.1.10.4 org.yaml:snakeyaml:2.0 org.wildfly.openssl:wildfly-openssl:1.1.3.Final -software.amazon.awssdk:bundle:jar:2.21.41 +software.amazon.awssdk:bundle:jar:2.23.5 -------------------------------------------------------------------------------- diff --git a/NOTICE-binary b/NOTICE-binary index 6db51d08b42f0..7389a31fd5a11 100644 --- a/NOTICE-binary +++ b/NOTICE-binary @@ -66,7 +66,7 @@ available from http://www.digip.org/jansson/. AWS SDK for Java -Copyright 2010-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved. +Copyright 2010-2024 Amazon.com, Inc. or its affiliates. All Rights Reserved. This product includes software developed by Amazon Technologies, Inc (http://www.amazon.com/). diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 6971960de49c2..b8fa01ce2e93a 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -186,8 +186,8 @@ 1.3.1 1.0-beta-1 900 - 1.12.565 - 2.21.41 + 1.12.599 + 2.23.5 1.0.1 2.7.1 1.11.2 diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index c2eafbcb8de28..62d449daeea56 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -1142,7 +1142,7 @@ as it may take a couple of SDK updates before it is ready. 1. Identify the latest AWS SDK [available for download](https://aws.amazon.com/sdk-for-java/). 1. Create a private git branch of trunk for JIRA, and in `hadoop-project/pom.xml` update the `aws-java-sdk.version` to the new SDK version. -1. Update AWS SDK versions in NOTICE.txt. +1. Update AWS SDK versions in NOTICE.txt and LICENSE.binary 1. Do a clean build and rerun all the `hadoop-aws` tests. This includes the `-Pscale` set, with a role defined for the assumed role tests. in `fs.s3a.assumed.role.arn` for testing assumed roles, @@ -1164,11 +1164,18 @@ your IDE or via maven. `mvn dependency:tree -Dverbose > target/dependencies.txt`. Examine the `target/dependencies.txt` file to verify that no new artifacts have unintentionally been declared as dependencies - of the shaded `aws-java-sdk-bundle` artifact. + of the shaded `software.amazon.awssdk:bundle:jar` artifact. 1. Run a full AWS-test suite with S3 client-side encryption enabled by setting `fs.s3a.encryption.algorithm` to 'CSE-KMS' and setting up AWS-KMS Key ID in `fs.s3a.encryption.key`. +The dependency chain of the `hadoop-aws` module should be similar to this, albeit +with different version numbers: +``` +[INFO] +- org.apache.hadoop:hadoop-aws:jar:3.4.0-SNAPSHOT:compile +[INFO] | +- software.amazon.awssdk:bundle:jar:2.23.5:compile +[INFO] | \- org.wildfly.openssl:wildfly-openssl:jar:1.1.3.Final:compile +``` ### Basic command line regression testing We need a run through of the CLI to see if there have been changes there @@ -1365,5 +1372,5 @@ Don't be surprised if this happens, don't worry too much, and, while that rollback option is there to be used, ideally try to work forwards. If the problem is with the SDK, file issues with the - [AWS SDK Bug tracker](https://github.com/aws/aws-sdk-java/issues). + [AWS V2 SDK Bug tracker](https://github.com/aws/aws-sdk-java-v2/issues). If the problem can be fixed or worked around in the Hadoop code, do it there too. From 5657c361ece6352a2de2c3acf91518d34229e6cf Mon Sep 17 00:00:00 2001 From: Pranav Saxena <108325433+saxenapranav@users.noreply.github.com> Date: Tue, 30 Jan 2024 05:17:04 -0800 Subject: [PATCH 019/164] HADOOP-18883. [ABFS]: Expect-100 JDK bug resolution: prevent multiple server calls (#6022) Address JDK bug JDK-8314978 related to handling of HTTP 100 responses. https://bugs.openjdk.org/browse/JDK-8314978 In the AbfsHttpOperation, after sendRequest() we call processResponse() method from AbfsRestOperation. Even if the conn.getOutputStream() fails due to expect-100 error, we consume the exception and let the code go ahead. This may call getHeaderField() / getHeaderFields() / getHeaderFieldLong() after getOutputStream() has failed. These invocation all lead to server calls. This commit aims to prevent this. If connection.getOutputStream() fails due to an Expect-100 error, the ABFS client does not invoke getHeaderField(), getHeaderFields(), getHeaderFieldLong() or getInputStream(). getResponseCode() is safe as on the failure it sets the responseCode variable in HttpUrlConnection object. Contributed by Pranav Saxena --- .../azurebfs/constants/AbfsHttpConstants.java | 1 + .../azurebfs/services/AbfsHttpOperation.java | 41 +++++++++++-- .../azurebfs/services/AbfsOutputStream.java | 9 ++- .../fs/azurebfs/services/ITestAbfsClient.java | 3 +- .../services/ITestAbfsOutputStream.java | 61 +++++++++++++++++++ .../services/ITestAbfsRestOperation.java | 3 +- 6 files changed, 109 insertions(+), 9 deletions(-) diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java index 91f6bddcc1d46..63de71eb178d4 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/constants/AbfsHttpConstants.java @@ -69,6 +69,7 @@ public final class AbfsHttpConstants { * and should qualify for retry. */ public static final int HTTP_CONTINUE = 100; + public static final String EXPECT_100_JDK_ERROR = "Server rejected operation"; // Abfs generic constants public static final String SINGLE_WHITE_SPACE = " "; diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java index 7f5df6066f1b2..c0b554f607027 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsHttpOperation.java @@ -22,12 +22,14 @@ import java.io.InputStream; import java.io.OutputStream; import java.net.HttpURLConnection; +import java.net.ProtocolException; import java.net.URL; import java.util.List; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLSocketFactory; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.fs.azurebfs.utils.UriUtils; import org.apache.hadoop.security.ssl.DelegatingSSLSocketFactory; @@ -43,6 +45,7 @@ import org.apache.hadoop.fs.azurebfs.contracts.services.AbfsPerfLoggable; import org.apache.hadoop.fs.azurebfs.contracts.services.ListResultSchema; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EXPECT_100_JDK_ERROR; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; import static org.apache.hadoop.fs.azurebfs.constants.HttpHeaderConfigurations.EXPECT; @@ -83,6 +86,7 @@ public class AbfsHttpOperation implements AbfsPerfLoggable { private long sendRequestTimeMs; private long recvResponseTimeMs; private boolean shouldMask = false; + private boolean connectionDisconnectedOnError = false; public static AbfsHttpOperation getAbfsHttpOperationWithFixedResult( final URL url, @@ -324,14 +328,26 @@ public void sendRequest(byte[] buffer, int offset, int length) throws IOExceptio */ outputStream = getConnOutputStream(); } catch (IOException e) { - /* If getOutputStream fails with an exception and expect header - is enabled, we return back without throwing an exception to - the caller. The caller is responsible for setting the correct status code. - If expect header is not enabled, we throw back the exception. + connectionDisconnectedOnError = true; + /* If getOutputStream fails with an expect-100 exception , we return back + without throwing an exception to the caller. Else, we throw back the exception. */ String expectHeader = getConnProperty(EXPECT); - if (expectHeader != null && expectHeader.equals(HUNDRED_CONTINUE)) { + if (expectHeader != null && expectHeader.equals(HUNDRED_CONTINUE) + && e instanceof ProtocolException + && EXPECT_100_JDK_ERROR.equals(e.getMessage())) { LOG.debug("Getting output stream failed with expect header enabled, returning back ", e); + /* + * In case expect-100 assertion has failed, headers and inputStream should not + * be parsed. Reason being, conn.getHeaderField(), conn.getHeaderFields(), + * conn.getInputStream() will lead to repeated server call. + * ref: https://bugs.openjdk.org/browse/JDK-8314978. + * Reading conn.responseCode() and conn.getResponseMessage() is safe in + * case of Expect-100 error. Reason being, in JDK, it stores the responseCode + * in the HttpUrlConnection object before throwing exception to the caller. + */ + this.statusCode = getConnResponseCode(); + this.statusDescription = getConnResponseMessage(); return; } else { LOG.debug("Getting output stream failed without expect header enabled, throwing exception ", e); @@ -364,7 +380,17 @@ public void sendRequest(byte[] buffer, int offset, int length) throws IOExceptio * @throws IOException if an error occurs. */ public void processResponse(final byte[] buffer, final int offset, final int length) throws IOException { + if (connectionDisconnectedOnError) { + LOG.debug("This connection was not successful or has been disconnected, " + + "hence not parsing headers and inputStream"); + return; + } + processConnHeadersAndInputStreams(buffer, offset, length); + } + void processConnHeadersAndInputStreams(final byte[] buffer, + final int offset, + final int length) throws IOException { // get the response long startTime = 0; startTime = System.nanoTime(); @@ -608,6 +634,11 @@ String getConnResponseMessage() throws IOException { return connection.getResponseMessage(); } + @VisibleForTesting + Boolean getConnectionDisconnectedOnError() { + return connectionDisconnectedOnError; + } + public static class AbfsHttpOperationWithFixedResult extends AbfsHttpOperation { /** * Creates an instance to represent fixed results. diff --git a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java index 5780e290a0785..74657c718a1b6 100644 --- a/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azurebfs/services/AbfsOutputStream.java @@ -338,7 +338,7 @@ private void uploadBlockAsync(DataBlocks.DataBlock blockToUpload, */ AppendRequestParameters reqParams = new AppendRequestParameters( offset, 0, bytesLength, mode, false, leaseId, isExpectHeaderEnabled); - AbfsRestOperation op = client.append(path, + AbfsRestOperation op = getClient().append(path, blockUploadData.toByteArray(), reqParams, cachedSasToken.get(), contextEncryptionAdapter, new TracingContext(tracingContext)); cachedSasToken.update(op.getSasToken()); @@ -655,7 +655,7 @@ private synchronized void flushWrittenBytesToServiceInternal(final long offset, AbfsPerfTracker tracker = client.getAbfsPerfTracker(); try (AbfsPerfInfo perfInfo = new AbfsPerfInfo(tracker, "flushWrittenBytesToServiceInternal", "flush")) { - AbfsRestOperation op = client.flush(path, offset, retainUncommitedData, + AbfsRestOperation op = getClient().flush(path, offset, retainUncommitedData, isClose, cachedSasToken.get(), leaseId, contextEncryptionAdapter, new TracingContext(tracingContext)); cachedSasToken.update(op.getSasToken()); @@ -795,4 +795,9 @@ BackReference getFsBackRef() { ListeningExecutorService getExecutorService() { return executorService; } + + @VisibleForTesting + AbfsClient getClient() { + return client; + } } diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java index d19c4470b2996..5ef835e55f419 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsClient.java @@ -48,6 +48,7 @@ import static java.net.HttpURLConnection.HTTP_NOT_FOUND; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EXPECT_100_JDK_ERROR; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; @@ -586,7 +587,7 @@ public void testExpectHundredContinue() throws Exception { .getConnResponseMessage(); // Make the getOutputStream throw IOException to see it returns from the sendRequest correctly. - Mockito.doThrow(new ProtocolException("Server rejected Operation")) + Mockito.doThrow(new ProtocolException(EXPECT_100_JDK_ERROR)) .when(abfsHttpOperation) .getConnOutputStream(); diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java index eee0c177c33b3..359846ce14dae 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsOutputStream.java @@ -18,15 +18,19 @@ package org.apache.hadoop.fs.azurebfs.services; +import java.io.FileNotFoundException; import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.net.URL; import org.assertj.core.api.Assertions; import org.junit.Test; +import org.mockito.Mockito; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.azurebfs.AbstractAbfsIntegrationTest; @@ -34,6 +38,8 @@ import org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys; import org.apache.hadoop.test.LambdaTestUtils; +import static org.apache.hadoop.fs.azurebfs.constants.ConfigurationKeys.FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED; + /** * Test create operation. */ @@ -148,6 +154,61 @@ public void testAbfsOutputStreamClosingFsBeforeStream() } } + @Test + public void testExpect100ContinueFailureInAppend() throws Exception { + Configuration configuration = new Configuration(getRawConfiguration()); + configuration.set(FS_AZURE_ACCOUNT_IS_EXPECT_HEADER_ENABLED, "true"); + AzureBlobFileSystem fs = (AzureBlobFileSystem) FileSystem.newInstance( + configuration); + Path path = new Path("/testFile"); + AbfsOutputStream os = Mockito.spy( + (AbfsOutputStream) fs.create(path).getWrappedStream()); + AbfsClient spiedClient = Mockito.spy(os.getClient()); + AbfsHttpOperation[] httpOpForAppendTest = new AbfsHttpOperation[2]; + mockSetupForAppend(httpOpForAppendTest, spiedClient); + Mockito.doReturn(spiedClient).when(os).getClient(); + fs.delete(path, true); + os.write(1); + LambdaTestUtils.intercept(FileNotFoundException.class, () -> { + os.close(); + }); + Assertions.assertThat(httpOpForAppendTest[0].getConnectionDisconnectedOnError()) + .describedAs("First try from AbfsClient will have expect-100 " + + "header and should fail with expect-100 error.").isTrue(); + Mockito.verify(httpOpForAppendTest[0], Mockito.times(0)) + .processConnHeadersAndInputStreams(Mockito.any(byte[].class), + Mockito.anyInt(), Mockito.anyInt()); + + Assertions.assertThat(httpOpForAppendTest[1].getConnectionDisconnectedOnError()) + .describedAs("The retried operation from AbfsClient should not " + + "fail with expect-100 error. The retried operation does not have" + + "expect-100 header.").isFalse(); + Mockito.verify(httpOpForAppendTest[1], Mockito.times(1)) + .processConnHeadersAndInputStreams(Mockito.any(byte[].class), + Mockito.anyInt(), Mockito.anyInt()); + } + + private void mockSetupForAppend(final AbfsHttpOperation[] httpOpForAppendTest, + final AbfsClient spiedClient) { + int[] index = new int[1]; + index[0] = 0; + Mockito.doAnswer(abfsRestOpAppendGetInvocation -> { + AbfsRestOperation op = Mockito.spy( + (AbfsRestOperation) abfsRestOpAppendGetInvocation.callRealMethod()); + Mockito.doAnswer(createHttpOpInvocation -> { + httpOpForAppendTest[index[0]] = Mockito.spy( + (AbfsHttpOperation) createHttpOpInvocation.callRealMethod()); + return httpOpForAppendTest[index[0]++]; + }).when(op).createHttpOperation(); + return op; + }) + .when(spiedClient) + .getAbfsRestOperation(Mockito.any(AbfsRestOperationType.class), + Mockito.anyString(), Mockito.any( + URL.class), Mockito.anyList(), Mockito.any(byte[].class), + Mockito.anyInt(), Mockito.anyInt(), Mockito.nullable(String.class)); + } + /** * Separate method to create an outputStream using a local FS instance so * that once this method has returned, the FS instance can be eligible for GC. diff --git a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java index 6574a808f92bd..16a47d15f523f 100644 --- a/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java +++ b/hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azurebfs/services/ITestAbfsRestOperation.java @@ -49,6 +49,7 @@ import static java.net.HttpURLConnection.HTTP_OK; import static java.net.HttpURLConnection.HTTP_UNAVAILABLE; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.APPEND_ACTION; +import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.EXPECT_100_JDK_ERROR; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PATCH; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HTTP_METHOD_PUT; import static org.apache.hadoop.fs.azurebfs.constants.AbfsHttpConstants.HUNDRED_CONTINUE; @@ -232,7 +233,7 @@ private AbfsRestOperation getRestOperation() throws Exception { Mockito.doReturn(responseMessage) .when(abfsHttpOperation) .getConnResponseMessage(); - Mockito.doThrow(new ProtocolException("Server rejected Operation")) + Mockito.doThrow(new ProtocolException(EXPECT_100_JDK_ERROR)) .when(abfsHttpOperation) .getConnOutputStream(); break; From f2cc36559348b052dca41e817d93f082470ed083 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 30 Jan 2024 15:32:24 +0000 Subject: [PATCH 020/164] HADOOP-19045. S3A: Validate CreateSession Timeout Propagation (#6470) New test ITestCreateSessionTimeout to verify that the duration set in fs.s3a.connection.request.timeout is passed all the way down. This is done by adding a sleep() in a custom signer and verifying that it is interrupted and that an AWSApiCallTimeoutException is raised. + Fix testRequestTimeout() * doesn't skip if considered cross-region * sets a minimum duration of 0 before invocation * resets the minimum afterwards Contributed by Steve Loughran --- .../org/apache/hadoop/fs/s3a/Constants.java | 25 ++- .../hadoop/fs/s3a/auth/CustomHttpSigner.java | 2 +- .../hadoop/fs/s3a/impl/AWSClientConfig.java | 2 +- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 28 ++- .../apache/hadoop/fs/s3a/S3ATestUtils.java | 10 + .../ITestCreateSessionTimeout.java | 211 ++++++++++++++++++ 6 files changed, 262 insertions(+), 16 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateSessionTimeout.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 4408cf68a451e..744146ccf4f37 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -337,16 +337,33 @@ private Constants() { public static final int DEFAULT_SOCKET_TIMEOUT = (int)DEFAULT_SOCKET_TIMEOUT_DURATION.toMillis(); /** - * Time until a request is timed-out: {@value}. - * If zero, there is no timeout. + * How long should the SDK retry/wait on a response from an S3 store: {@value} + * including the time needed to sign the request. + *

+ * This is time to response, so for a GET request it is "time to 200 response" + * not the time limit to download the requested data. + * This makes it different from {@link #REQUEST_TIMEOUT}, which is for total + * HTTP request. + *

+ * Default unit is milliseconds. + *

+ * There is a minimum duration set in {@link #MINIMUM_NETWORK_OPERATION_DURATION}; + * it is impossible to set a delay less than this, even for testing. + * Why so? Too many deployments where the configuration assumed the timeout was in seconds + * and that "120" was a reasonable value rather than "too short to work reliably" + *

+ * Note for anyone writing tests which need to set a low value for this: + * to avoid the minimum duration overrides, call + * {@code AWSClientConfig.setMinimumOperationDuration()} and set a low value + * before creating the filesystem. */ public static final String REQUEST_TIMEOUT = "fs.s3a.connection.request.timeout"; /** - * Default duration of a request before it is timed out: Zero. + * Default duration of a request before it is timed out: 60s. */ - public static final Duration DEFAULT_REQUEST_TIMEOUT_DURATION = Duration.ZERO; + public static final Duration DEFAULT_REQUEST_TIMEOUT_DURATION = Duration.ofSeconds(60); /** * Default duration of a request before it is timed out: Zero. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CustomHttpSigner.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CustomHttpSigner.java index ba1169a5e5987..528414b63e32e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CustomHttpSigner.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CustomHttpSigner.java @@ -40,7 +40,7 @@ * fs.s3a.http.signer.class = org.apache.hadoop.fs.s3a.auth.CustomHttpSigner * */ -public final class CustomHttpSigner implements HttpSigner { +public class CustomHttpSigner implements HttpSigner { private static final Logger LOG = LoggerFactory .getLogger(CustomHttpSigner.class); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index f6da9d84e0a77..60729ac30866a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -577,7 +577,7 @@ static ClientSettings createApiConnectionSettings(Configuration conf) { /** * Build the HTTP connection settings object from the configuration. - * All settings are calculated, including the api call timeout. + * All settings are calculated. * @param conf configuration to evaluate * @return connection settings. */ diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 8787fca431cc7..73bba9d62cbd8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -23,6 +23,7 @@ import java.net.ConnectException; import java.net.URI; import java.security.PrivilegedExceptionAction; +import java.time.Duration; import org.assertj.core.api.Assertions; import org.junit.Rule; @@ -49,6 +50,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.auth.STSClientFactory; +import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; import org.apache.hadoop.fs.s3native.S3xLoginHelper; import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.security.UserGroupInformation; @@ -435,16 +437,22 @@ public void testCustomUserAgent() throws Exception { @Test public void testRequestTimeout() throws Exception { conf = new Configuration(); - skipIfCrossRegionClient(conf); - conf.set(REQUEST_TIMEOUT, "120"); - fs = S3ATestUtils.createTestFileSystem(conf); - S3Client s3 = getS3Client("Request timeout (ms)"); - SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, - "clientConfiguration"); - assertEquals("Configured " + REQUEST_TIMEOUT + - " is different than what AWS sdk configuration uses internally", - 120000, - clientConfiguration.option(SdkClientOption.API_CALL_ATTEMPT_TIMEOUT).toMillis()); + // remove the safety check on minimum durations. + AWSClientConfig.setMinimumOperationDuration(Duration.ZERO); + try { + Duration timeout = Duration.ofSeconds(120); + conf.set(REQUEST_TIMEOUT, timeout.getSeconds() + "s"); + fs = S3ATestUtils.createTestFileSystem(conf); + S3Client s3 = getS3Client("Request timeout (ms)"); + SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, + "clientConfiguration"); + Assertions.assertThat(clientConfiguration.option(SdkClientOption.API_CALL_ATTEMPT_TIMEOUT)) + .describedAs("Configured " + REQUEST_TIMEOUT + + " is different than what AWS sdk configuration uses internally") + .isEqualTo(timeout); + } finally { + AWSClientConfig.resetMinimumOperationDuration(); + } } @Test diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java index ed1fda316dfe5..e7ea920d8a0a0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestUtils.java @@ -550,6 +550,16 @@ public static void skipIfS3ExpressBucket( !isS3ExpressTestBucket(configuration)); } + /** + * Skip a test if the test bucket is not an S3Express bucket. + * @param configuration configuration to probe + */ + public static void skipIfNotS3ExpressBucket( + Configuration configuration) { + assume("Skipping test as bucket is not an S3Express bucket", + isS3ExpressTestBucket(configuration)); + } + /** * Is the test bucket an S3Express bucket? * @param conf configuration diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateSessionTimeout.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateSessionTimeout.java new file mode 100644 index 0000000000000..ebd771bddb3ff --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestCreateSessionTimeout.java @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.performance; + +import java.time.Duration; +import java.util.Arrays; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.http.SdkHttpRequest; +import software.amazon.awssdk.http.auth.spi.signer.AsyncSignRequest; +import software.amazon.awssdk.http.auth.spi.signer.AsyncSignedRequest; +import software.amazon.awssdk.http.auth.spi.signer.HttpSigner; +import software.amazon.awssdk.http.auth.spi.signer.SignRequest; +import software.amazon.awssdk.http.auth.spi.signer.SignedRequest; +import software.amazon.awssdk.identity.spi.AwsCredentialsIdentity; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.AWSApiCallTimeoutException; +import org.apache.hadoop.fs.s3a.S3AFileSystem; +import org.apache.hadoop.fs.s3a.auth.CustomHttpSigner; +import org.apache.hadoop.fs.s3a.impl.AWSClientConfig; +import org.apache.hadoop.util.DurationInfo; + +import static org.apache.hadoop.fs.s3a.Constants.CUSTOM_SIGNERS; +import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_CLASS_NAME; +import static org.apache.hadoop.fs.s3a.Constants.HTTP_SIGNER_ENABLED; +import static org.apache.hadoop.fs.s3a.Constants.REQUEST_TIMEOUT; +import static org.apache.hadoop.fs.s3a.Constants.RETRY_LIMIT; +import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE; +import static org.apache.hadoop.fs.s3a.Constants.S3EXPRESS_CREATE_SESSION; +import static org.apache.hadoop.fs.s3a.Constants.SIGNING_ALGORITHM_S3; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.disableFilesystemCaching; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.skipIfNotS3ExpressBucket; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; + +/** + * Test timeout of S3 Client CreateSession call, which was originally + * hard coded to 10 seconds. + * Only executed against an S3Express store. + */ +public class ITestCreateSessionTimeout extends AbstractS3ACostTest { + + private static final Logger LOG = + LoggerFactory.getLogger(ITestCreateSessionTimeout.class); + + /** + * What is the duration for the operation after which the test is considered + * to have failed because timeouts didn't get passed down? + */ + private static final long TIMEOUT_EXCEPTION_THRESHOLD = Duration.ofSeconds(5).toMillis(); + + /** + * How long to sleep in requests? + */ + private static final AtomicLong SLEEP_DURATION = new AtomicLong( + Duration.ofSeconds(20).toMillis()); + + /** + * Flag set if the sleep was interrupted during signing. + */ + private static final AtomicBoolean SLEEP_INTERRUPTED = new AtomicBoolean(false); + + /** + * Create a configuration with a 10 millisecond timeout on API calls + * and a custom signer which sleeps much longer than that. + * @return the configuration. + */ + @Override + public Configuration createConfiguration() { + final Configuration conf = super.createConfiguration(); + skipIfNotS3ExpressBucket(conf); + disableFilesystemCaching(conf); + removeBaseAndBucketOverrides(conf, + CUSTOM_SIGNERS, + HTTP_SIGNER_ENABLED, + REQUEST_TIMEOUT, + RETRY_LIMIT, + S3A_BUCKET_PROBE, + S3EXPRESS_CREATE_SESSION, + SIGNING_ALGORITHM_S3 + ); + + conf.setBoolean(HTTP_SIGNER_ENABLED, true); + conf.setClass(HTTP_SIGNER_CLASS_NAME, SlowSigner.class, HttpSigner.class); + Duration duration = Duration.ofMillis(10); + + conf.setLong(REQUEST_TIMEOUT, duration.toMillis()); + conf.setInt(RETRY_LIMIT, 1); + + return conf; + } + + @Override + public void setup() throws Exception { + // remove the safety check on minimum durations. + AWSClientConfig.setMinimumOperationDuration(Duration.ZERO); + try { + super.setup(); + } finally { + // restore the safety check on minimum durations. + AWSClientConfig.resetMinimumOperationDuration(); + } + } + + @Override + protected void deleteTestDirInTeardown() { + // no-op + } + + /** + * Make this a no-op to avoid IO. + * @param path path path + */ + @Override + protected void mkdirs(Path path) { + + } + + @Test + public void testSlowSigningTriggersTimeout() throws Throwable { + + final S3AFileSystem fs = getFileSystem(); + DurationInfo call = new DurationInfo(LOG, true, "Create session"); + final AWSApiCallTimeoutException thrown = intercept(AWSApiCallTimeoutException.class, + () -> fs.getFileStatus(path("testShortTimeout"))); + call.finished(); + LOG.info("Exception raised after {}", call, thrown); + // if the timeout took too long, fail with details and include the original + // exception + if (call.value() > TIMEOUT_EXCEPTION_THRESHOLD) { + throw new AssertionError("Duration of create session " + call.getDurationString() + + " exceeds threshold " + TIMEOUT_EXCEPTION_THRESHOLD + " ms: " + thrown, thrown); + } + Assertions.assertThat(SLEEP_INTERRUPTED.get()) + .describedAs("Sleep interrupted during signing") + .isTrue(); + + // now scan the inner exception stack for "createSession" + Arrays.stream(thrown.getCause().getStackTrace()) + .filter(e -> e.getMethodName().equals("createSession")) + .findFirst() + .orElseThrow(() -> + new AssertionError("No createSession() in inner stack trace of", thrown)); + } + + /** + * Sleep for as long as {@link #SLEEP_DURATION} requires. + */ + private static void sleep() { + long sleep = SLEEP_DURATION.get(); + if (sleep > 0) { + LOG.info("Sleeping for {} ms", sleep, new Exception()); + try (DurationInfo d = new DurationInfo(LOG, true, "Sleep for %d ms", sleep)) { + Thread.sleep(sleep); + } catch (InterruptedException e) { + LOG.info("Interrupted", e); + SLEEP_INTERRUPTED.set(true); + Thread.currentThread().interrupt(); + } + } + } + + /** + * A signer which calls {@link #sleep()} before signing. + * As this signing takes place within the CreateSession Pipeline, + */ + public static class SlowSigner extends CustomHttpSigner { + + @Override + public SignedRequest sign( + final SignRequest request) { + + final SdkHttpRequest httpRequest = request.request(); + LOG.info("Signing request {}", httpRequest); + sleep(); + return super.sign(request); + } + + @Override + public CompletableFuture signAsync( + final AsyncSignRequest request) { + sleep(); + return super.signAsync(request); + } + + } +} From 91ba4848b36d0a479975679e8a0bc22d0a74a7c5 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 30 Jan 2024 16:12:27 +0000 Subject: [PATCH 021/164] HADOOP-18830. Cut S3 Select (#6144) Cut out S3 Select * leave public/unstable constants alone * s3guard tool will fail with error * s3afs. path capability will fail * openFile() will fail with specific error * s3 select doc updated * Cut eventstream jar * New test: ITestSelectUnsupported verifies new failure handling above Contributed by Steve Loughran --- hadoop-project/pom.xml | 5 - hadoop-tools/hadoop-aws/pom.xml | 5 - .../apache/hadoop/fs/s3a/S3AFileSystem.java | 129 +- .../hadoop/fs/s3a/S3ObjectAttributes.java | 2 +- .../org/apache/hadoop/fs/s3a/Statistic.java | 4 - .../hadoop/fs/s3a/WriteOperationHelper.java | 75 -- .../apache/hadoop/fs/s3a/WriteOperations.java | 29 - .../hadoop/fs/s3a/api/RequestFactory.java | 9 - .../fs/s3a/audit/AWSRequestAnalyzer.java | 8 - .../hadoop/fs/s3a/impl/ChangeTracker.java | 2 +- .../hadoop/fs/s3a/impl/InternalConstants.java | 2 - .../hadoop/fs/s3a/impl/OpenFileSupport.java | 74 +- .../fs/s3a/impl/OperationCallbacks.java | 2 +- .../fs/s3a/impl/RequestFactoryImpl.java | 15 - .../hadoop/fs/s3a/s3guard/S3GuardTool.java | 12 +- .../fs/s3a/select/BlockingEnumeration.java | 156 --- .../s3a/select/InternalSelectConstants.java | 77 -- .../hadoop/fs/s3a/select/SelectBinding.java | 428 ------- .../hadoop/fs/s3a/select/SelectConstants.java | 21 +- .../select/SelectEventStreamPublisher.java | 124 -- .../fs/s3a/select/SelectInputStream.java | 455 ------- .../s3a/select/SelectObjectContentHelper.java | 114 -- .../hadoop/fs/s3a/select/SelectTool.java | 347 ----- .../hadoop/fs/s3a/select/package-info.java | 7 +- .../markdown/tools/hadoop-aws/connecting.md | 24 +- .../markdown/tools/hadoop-aws/encryption.md | 1 - .../markdown/tools/hadoop-aws/s3_select.md | 1127 +---------------- .../site/markdown/tools/hadoop-aws/testing.md | 138 +- .../tools/hadoop-aws/third_party_stores.md | 5 - .../tools/hadoop-aws/troubleshooting_s3a.md | 4 +- .../fs/s3a/impl/TestRequestFactory.java | 1 - .../fs/s3a/select/AbstractS3SelectTest.java | 756 ----------- .../hadoop/fs/s3a/select/ITestS3Select.java | 981 -------------- .../fs/s3a/select/ITestS3SelectCLI.java | 357 ------ .../fs/s3a/select/ITestS3SelectLandsat.java | 435 ------- .../fs/s3a/select/ITestS3SelectMRJob.java | 216 ---- .../fs/s3a/select/ITestSelectUnsupported.java | 100 ++ .../hadoop/fs/s3a/select/StreamPublisher.java | 89 -- .../s3a/select/TestBlockingEnumeration.java | 200 --- .../TestSelectEventStreamPublisher.java | 190 --- .../MinimalWriteOperationHelperCallbacks.java | 13 +- .../fs/s3a/{select => tools}/CsvFile.java | 2 +- .../src/test/resources/core-site.xml | 19 +- 43 files changed, 264 insertions(+), 6496 deletions(-) delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/InternalSelectConstants.java delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectTool.java delete mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/AbstractS3SelectTest.java delete mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3Select.java delete mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectCLI.java delete mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectLandsat.java delete mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestS3SelectMRJob.java create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/ITestSelectUnsupported.java delete mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java delete mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestBlockingEnumeration.java delete mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java rename hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/{select => tools}/CsvFile.java (98%) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index b8fa01ce2e93a..3205e1f22c2fe 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -1121,11 +1121,6 @@ - - software.amazon.eventstream - eventstream - ${aws.eventstream.version} - org.apache.mina mina-core diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index efe38a3bc9382..b15251db04cba 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -508,11 +508,6 @@ bundle compile - - software.amazon.eventstream - eventstream - test - org.assertj assertj-core diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index c5e6e09a835eb..de48c2df15698 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -83,8 +83,6 @@ import software.amazon.awssdk.services.s3.model.PutObjectResponse; import software.amazon.awssdk.services.s3.model.S3Error; import software.amazon.awssdk.services.s3.model.S3Object; -import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; -import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; import software.amazon.awssdk.services.s3.model.StorageClass; import software.amazon.awssdk.services.s3.model.UploadPartRequest; import software.amazon.awssdk.services.s3.model.UploadPartResponse; @@ -194,8 +192,6 @@ import org.apache.hadoop.fs.s3a.commit.PutTracker; import org.apache.hadoop.fs.s3a.commit.MagicCommitIntegration; import org.apache.hadoop.fs.s3a.impl.ChangeTracker; -import org.apache.hadoop.fs.s3a.select.SelectBinding; -import org.apache.hadoop.fs.s3a.select.SelectConstants; import org.apache.hadoop.fs.s3a.s3guard.S3Guard; import org.apache.hadoop.fs.s3a.statistics.BlockOutputStreamStatistics; import org.apache.hadoop.fs.s3a.statistics.CommitterStatistics; @@ -299,7 +295,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private S3Client s3Client; - /** Async client is used for transfer manager and s3 select. */ + /** Async client is used for transfer manager. */ private S3AsyncClient s3AsyncClient; // initial callback policy is fail-once; it's there just to assist @@ -1725,8 +1721,7 @@ public FSDataInputStream open(Path f, int bufferSize) /** * Opens an FSDataInputStream at the indicated Path. * The {@code fileInformation} parameter controls how the file - * is opened, whether it is normal vs. an S3 select call, - * can a HEAD be skipped, etc. + * is opened, can a HEAD be skipped, etc. * @param path the file to open * @param fileInformation information about the file to open * @throws IOException IO failure. @@ -1853,13 +1848,6 @@ public CompletableFuture submit(final CallableRaisingIOE operation) { private final class WriteOperationHelperCallbacksImpl implements WriteOperationHelper.WriteOperationHelperCallbacks { - @Override - public CompletableFuture selectObjectContent( - SelectObjectContentRequest request, - SelectObjectContentResponseHandler responseHandler) { - return getS3AsyncClient().selectObjectContent(request, responseHandler); - } - @Override public CompleteMultipartUploadResponse completeMultipartUpload( CompleteMultipartUploadRequest request) { @@ -1872,7 +1860,7 @@ public CompleteMultipartUploadResponse completeMultipartUpload( * using FS state as well as the status. * @param fileStatus file status. * @param auditSpan audit span. - * @return a context for read and select operations. + * @return a context for read operations. */ @VisibleForTesting protected S3AReadOpContext createReadContext( @@ -5452,13 +5440,6 @@ public boolean hasPathCapability(final Path path, final String capability) // capability depends on FS configuration return isMagicCommitEnabled(); - case SelectConstants.S3_SELECT_CAPABILITY: - // select is only supported if enabled and client side encryption is - // disabled. - return !isCSEEnabled - && SelectBinding.isSelectEnabled(getConf()) - && !s3ExpressStore; - case CommonPathCapabilities.FS_CHECKSUMS: // capability depends on FS configuration return getConf().getBoolean(ETAG_CHECKSUM_ENABLED, @@ -5572,85 +5553,6 @@ public AWSCredentialProviderList shareCredentials(final String purpose) { return credentials.share(); } - /** - * This is a proof of concept of a select API. - * @param source path to source data - * @param options request configuration from the builder. - * @param fileInformation any passed in information. - * @return the stream of the results - * @throws IOException IO failure - */ - @Retries.RetryTranslated - @AuditEntryPoint - private FSDataInputStream select(final Path source, - final Configuration options, - final OpenFileSupport.OpenFileInformation fileInformation) - throws IOException { - requireSelectSupport(source); - final AuditSpan auditSpan = entryPoint(OBJECT_SELECT_REQUESTS, source); - final Path path = makeQualified(source); - String expression = fileInformation.getSql(); - final S3AFileStatus fileStatus = extractOrFetchSimpleFileStatus(path, - fileInformation); - - // readahead range can be dynamically set - S3ObjectAttributes objectAttributes = createObjectAttributes( - path, fileStatus); - ChangeDetectionPolicy changePolicy = fileInformation.getChangePolicy(); - S3AReadOpContext readContext = createReadContext( - fileStatus, - auditSpan); - fileInformation.applyOptions(readContext); - - if (changePolicy.getSource() != ChangeDetectionPolicy.Source.None - && fileStatus.getEtag() != null) { - // if there is change detection, and the status includes at least an - // etag, - // check that the object metadata lines up with what is expected - // based on the object attributes (which may contain an eTag or - // versionId). - // This is because the select API doesn't offer this. - // (note: this is trouble for version checking as cannot force the old - // version in the final read; nor can we check the etag match) - ChangeTracker changeTracker = - new ChangeTracker(uri.toString(), - changePolicy, - readContext.getS3AStatisticsContext() - .newInputStreamStatistics() - .getChangeTrackerStatistics(), - objectAttributes); - - // will retry internally if wrong version detected - Invoker readInvoker = readContext.getReadInvoker(); - getObjectMetadata(path, changeTracker, readInvoker, "select"); - } - // instantiate S3 Select support using the current span - // as the active span for operations. - SelectBinding selectBinding = new SelectBinding( - createWriteOperationHelper(auditSpan)); - - // build and execute the request - return selectBinding.select( - readContext, - expression, - options, - objectAttributes); - } - - /** - * Verify the FS supports S3 Select. - * @param source source file. - * @throws UnsupportedOperationException if not. - */ - private void requireSelectSupport(final Path source) throws - UnsupportedOperationException { - if (!isCSEEnabled && !SelectBinding.isSelectEnabled(getConf())) { - - throw new UnsupportedOperationException( - SelectConstants.SELECT_UNSUPPORTED); - } - } - /** * Get the file status of the source file. * If in the fileInformation parameter return that @@ -5681,16 +5583,14 @@ private S3AFileStatus extractOrFetchSimpleFileStatus( } /** - * Initiate the open() or select() operation. + * Initiate the open() operation. * This is invoked from both the FileSystem and FileContext APIs. * It's declared as an audit entry point but the span creation is pushed - * down into the open/select methods it ultimately calls. + * down into the open operation s it ultimately calls. * @param rawPath path to the file * @param parameters open file parameters from the builder. - * @return a future which will evaluate to the opened/selected file. + * @return a future which will evaluate to the opened file. * @throws IOException failure to resolve the link. - * @throws PathIOException operation is a select request but S3 select is - * disabled * @throws IllegalArgumentException unknown mandatory key */ @Override @@ -5706,20 +5606,9 @@ public CompletableFuture openFileWithOptions( parameters, getDefaultBlockSize()); CompletableFuture result = new CompletableFuture<>(); - if (!fileInformation.isS3Select()) { - // normal path. - unboundedThreadPool.submit(() -> - LambdaUtils.eval(result, - () -> executeOpen(path, fileInformation))); - } else { - // it is a select statement. - // fail fast if the operation is not available - requireSelectSupport(path); - // submit the query - unboundedThreadPool.submit(() -> - LambdaUtils.eval(result, - () -> select(path, parameters.getOptions(), fileInformation))); - } + unboundedThreadPool.submit(() -> + LambdaUtils.eval(result, + () -> executeOpen(path, fileInformation))); return result; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java index 4fc5b8658b605..18912d5d3caef 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3ObjectAttributes.java @@ -25,7 +25,7 @@ /** * This class holds attributes of an object independent of the * file status type. - * It is used in {@link S3AInputStream} and the select equivalent. + * It is used in {@link S3AInputStream} and elsewhere. * as a way to reduce parameters being passed * to the constructor of such class, * and elsewhere to be a source-neutral representation of a file status. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java index 72fc75b642415..ce3af3de803a4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Statistic.java @@ -265,10 +265,6 @@ public enum Statistic { StoreStatisticNames.OBJECT_PUT_BYTES_PENDING, "number of bytes queued for upload/being actively uploaded", TYPE_GAUGE), - OBJECT_SELECT_REQUESTS( - StoreStatisticNames.OBJECT_SELECT_REQUESTS, - "Count of S3 Select requests issued", - TYPE_COUNTER), STREAM_READ_ABORTED( StreamStatisticNames.STREAM_READ_ABORTED, "Count of times the TCP stream was aborted", diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java index f2ece63a854fa..3bbe000bf5b6e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperationHelper.java @@ -22,7 +22,6 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.util.List; -import java.util.concurrent.CompletableFuture; import java.util.concurrent.atomic.AtomicInteger; import software.amazon.awssdk.core.sync.RequestBody; @@ -33,8 +32,6 @@ import software.amazon.awssdk.services.s3.model.MultipartUpload; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.PutObjectResponse; -import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; -import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; import software.amazon.awssdk.services.s3.model.UploadPartRequest; import software.amazon.awssdk.services.s3.model.UploadPartResponse; import org.slf4j.Logger; @@ -49,16 +46,11 @@ import org.apache.hadoop.fs.s3a.api.RequestFactory; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.s3a.impl.StoreContext; -import org.apache.hadoop.fs.s3a.select.SelectEventStreamPublisher; -import org.apache.hadoop.fs.s3a.select.SelectObjectContentHelper; import org.apache.hadoop.fs.s3a.statistics.S3AStatisticsContext; -import org.apache.hadoop.fs.s3a.select.SelectBinding; import org.apache.hadoop.fs.statistics.DurationTrackerFactory; import org.apache.hadoop.fs.store.audit.AuditSpan; import org.apache.hadoop.fs.store.audit.AuditSpanSource; -import org.apache.hadoop.util.DurationInfo; import org.apache.hadoop.util.functional.CallableRaisingIOE; -import org.apache.hadoop.util.Preconditions; import static org.apache.hadoop.util.Preconditions.checkNotNull; import static org.apache.hadoop.fs.s3a.Invoker.*; @@ -82,7 +74,6 @@ *

  • Other low-level access to S3 functions, for private use.
  • *
  • Failure handling, including converting exceptions to IOEs.
  • *
  • Integration with instrumentation.
  • - *
  • Evolution to add more low-level operations, such as S3 select.
  • * * * This API is for internal use only. @@ -615,63 +606,6 @@ public Configuration getConf() { return conf; } - public SelectObjectContentRequest.Builder newSelectRequestBuilder(Path path) { - try (AuditSpan span = getAuditSpan()) { - return getRequestFactory().newSelectRequestBuilder( - storeContext.pathToKey(path)); - } - } - - /** - * Execute an S3 Select operation. - * On a failure, the request is only logged at debug to avoid the - * select exception being printed. - * - * @param source source for selection - * @param request Select request to issue. - * @param action the action for use in exception creation - * @return response - * @throws IOException failure - */ - @Retries.RetryTranslated - public SelectEventStreamPublisher select( - final Path source, - final SelectObjectContentRequest request, - final String action) - throws IOException { - // no setting of span here as the select binding is (statically) created - // without any span. - String bucketName = request.bucket(); - Preconditions.checkArgument(bucket.equals(bucketName), - "wrong bucket: %s", bucketName); - if (LOG.isDebugEnabled()) { - LOG.debug("Initiating select call {} {}", - source, request.expression()); - LOG.debug(SelectBinding.toString(request)); - } - return invoker.retry( - action, - source.toString(), - true, - withinAuditSpan(getAuditSpan(), () -> { - try (DurationInfo ignored = - new DurationInfo(LOG, "S3 Select operation")) { - try { - return SelectObjectContentHelper.select( - writeOperationHelperCallbacks, source, request, action); - } catch (Throwable e) { - LOG.error("Failure of S3 Select request against {}", - source); - LOG.debug("S3 Select request against {}:\n{}", - source, - SelectBinding.toString(request), - e); - throw e; - } - } - })); - } - @Override public AuditSpan createSpan(final String operation, @Nullable final String path1, @@ -705,15 +639,6 @@ public RequestFactory getRequestFactory() { */ public interface WriteOperationHelperCallbacks { - /** - * Initiates a select request. - * @param request selectObjectContent request - * @param t selectObjectContent request handler - * @return selectObjectContentResult - */ - CompletableFuture selectObjectContent(SelectObjectContentRequest request, - SelectObjectContentResponseHandler t); - /** * Initiates a complete multi-part upload request. * @param request Complete multi-part upload request diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java index 0fda4921a30da..5ad9c9f9b6482 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/WriteOperations.java @@ -31,16 +31,13 @@ import software.amazon.awssdk.services.s3.model.MultipartUpload; import software.amazon.awssdk.services.s3.model.PutObjectRequest; import software.amazon.awssdk.services.s3.model.PutObjectResponse; -import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; import software.amazon.awssdk.services.s3.model.UploadPartRequest; import software.amazon.awssdk.services.s3.model.UploadPartResponse; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; import org.apache.hadoop.fs.statistics.DurationTrackerFactory; -import org.apache.hadoop.fs.s3a.select.SelectEventStreamPublisher; import org.apache.hadoop.fs.store.audit.AuditSpanSource; import org.apache.hadoop.util.functional.CallableRaisingIOE; @@ -274,32 +271,6 @@ UploadPartResponse uploadPart(UploadPartRequest request, RequestBody body, */ Configuration getConf(); - /** - * Create a S3 Select request builder for the destination path. - * This does not build the query. - * @param path pre-qualified path for query - * @return the request builder - */ - SelectObjectContentRequest.Builder newSelectRequestBuilder(Path path); - - /** - * Execute an S3 Select operation. - * On a failure, the request is only logged at debug to avoid the - * select exception being printed. - * - * @param source source for selection - * @param request Select request to issue. - * @param action the action for use in exception creation - * @return response - * @throws IOException failure - */ - @Retries.RetryTranslated - SelectEventStreamPublisher select( - Path source, - SelectObjectContentRequest request, - String action) - throws IOException; - /** * Increment the write operation counter * of the filesystem. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index 99a898f728166..73ad137a86d3c 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -37,7 +37,6 @@ import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; import software.amazon.awssdk.services.s3.model.StorageClass; import software.amazon.awssdk.services.s3.model.UploadPartRequest; @@ -214,14 +213,6 @@ UploadPartRequest.Builder newUploadPartRequestBuilder( int partNumber, long size) throws PathIOException; - /** - * Create a S3 Select request builder for the destination object. - * This does not build the query. - * @param key object key - * @return the request builder - */ - SelectObjectContentRequest.Builder newSelectRequestBuilder(String key); - /** * Create the (legacy) V1 list request builder. * @param key key to list under diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java index 3df862055d197..e91710a0af3a0 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/AWSRequestAnalyzer.java @@ -35,7 +35,6 @@ import software.amazon.awssdk.services.s3.model.ListObjectsV2Request; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; import software.amazon.awssdk.services.s3.model.UploadPartCopyRequest; import software.amazon.awssdk.services.s3.model.UploadPartRequest; @@ -50,7 +49,6 @@ import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_DELETE_REQUEST; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_LIST_REQUEST; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_PUT_REQUEST; -import static org.apache.hadoop.fs.statistics.StoreStatisticNames.OBJECT_SELECT_REQUESTS; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.STORE_EXISTS_PROBE; /** @@ -132,12 +130,6 @@ public RequestInfo analyze(SdkRequest request) { return writing(OBJECT_PUT_REQUEST, r.key(), 0); - } else if (request instanceof SelectObjectContentRequest) { - SelectObjectContentRequest r = - (SelectObjectContentRequest) request; - return reading(OBJECT_SELECT_REQUESTS, - r.key(), - 1); } else if (request instanceof UploadPartRequest) { UploadPartRequest r = (UploadPartRequest) request; return writing(MULTIPART_UPLOAD_PART_PUT, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java index 2c9d6857b46a2..0c56ca1f308bb 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/ChangeTracker.java @@ -223,7 +223,7 @@ public void processResponse(final CopyObjectResponse copyObjectResponse) * cause. * @param e the exception * @param operation the operation performed when the exception was - * generated (e.g. "copy", "read", "select"). + * generated (e.g. "copy", "read"). * @throws RemoteFileChangedException if the remote file has changed. */ public void processException(SdkException e, String operation) throws diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java index 8ebf8c013d10a..1d12a41008b6b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InternalConstants.java @@ -113,8 +113,6 @@ private InternalConstants() { /** * The known keys used in a standard openFile call. - * if there's a select marker in there then the keyset - * used becomes that of the select operation. */ @InterfaceStability.Unstable public static final Set S3A_OPENFILE_KEYS; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OpenFileSupport.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OpenFileSupport.java index 4703d63567245..b841e8f786dc4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OpenFileSupport.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OpenFileSupport.java @@ -35,8 +35,8 @@ import org.apache.hadoop.fs.s3a.S3AInputPolicy; import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus; import org.apache.hadoop.fs.s3a.S3AReadOpContext; -import org.apache.hadoop.fs.s3a.select.InternalSelectConstants; import org.apache.hadoop.fs.s3a.select.SelectConstants; +import org.apache.hadoop.fs.store.LogExactlyOnce; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_BUFFER_SIZE; import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH; @@ -68,6 +68,7 @@ public class OpenFileSupport { private static final Logger LOG = LoggerFactory.getLogger(OpenFileSupport.class); + public static final LogExactlyOnce LOG_NO_SQL_SELECT = new LogExactlyOnce(LOG); /** * For use when a value of an split/file length is unknown. */ @@ -153,12 +154,14 @@ public S3AReadOpContext applyDefaultOptions(S3AReadOpContext roc) { /** * Prepare to open a file from the openFile parameters. + * S3Select SQL is rejected if a mandatory opt, ignored if optional. * @param path path to the file * @param parameters open file parameters from the builder. * @param blockSize for fileStatus * @return open file options * @throws IOException failure to resolve the link. * @throws IllegalArgumentException unknown mandatory key + * @throws UnsupportedOperationException for S3 Select options. */ @SuppressWarnings("ChainOfInstanceofChecks") public OpenFileInformation prepareToOpenFile( @@ -167,21 +170,21 @@ public OpenFileInformation prepareToOpenFile( final long blockSize) throws IOException { Configuration options = parameters.getOptions(); Set mandatoryKeys = parameters.getMandatoryKeys(); - String sql = options.get(SelectConstants.SELECT_SQL, null); - boolean isSelect = sql != null; - // choice of keys depends on open type - if (isSelect) { - // S3 Select call adds a large set of supported mandatory keys - rejectUnknownMandatoryKeys( - mandatoryKeys, - InternalSelectConstants.SELECT_OPTIONS, - "for " + path + " in S3 Select operation"); - } else { - rejectUnknownMandatoryKeys( - mandatoryKeys, - InternalConstants.S3A_OPENFILE_KEYS, - "for " + path + " in non-select file I/O"); + // S3 Select is not supported in this release + if (options.get(SelectConstants.SELECT_SQL, null) != null) { + if (mandatoryKeys.contains(SelectConstants.SELECT_SQL)) { + // mandatory option: fail with a specific message. + throw new UnsupportedOperationException(SelectConstants.SELECT_UNSUPPORTED); + } else { + // optional; log once and continue + LOG_NO_SQL_SELECT.warn(SelectConstants.SELECT_UNSUPPORTED); + } } + // choice of keys depends on open type + rejectUnknownMandatoryKeys( + mandatoryKeys, + InternalConstants.S3A_OPENFILE_KEYS, + "for " + path + " in file I/O"); // where does a read end? long fileLength = LENGTH_UNKNOWN; @@ -281,8 +284,6 @@ public OpenFileInformation prepareToOpenFile( } return new OpenFileInformation() - .withS3Select(isSelect) - .withSql(sql) .withAsyncDrainThreshold( builderSupport.getPositiveLong(ASYNC_DRAIN_THRESHOLD, defaultReadAhead)) @@ -329,7 +330,6 @@ private S3AFileStatus createStatus(Path path, long length, long blockSize) { */ public OpenFileInformation openSimpleFile(final int bufferSize) { return new OpenFileInformation() - .withS3Select(false) .withAsyncDrainThreshold(defaultAsyncDrainThreshold) .withBufferSize(bufferSize) .withChangePolicy(changePolicy) @@ -357,15 +357,9 @@ public String toString() { */ public static final class OpenFileInformation { - /** Is this SQL? */ - private boolean isS3Select; - /** File status; may be null. */ private S3AFileStatus status; - /** SQL string if this is a SQL select file. */ - private String sql; - /** Active input policy. */ private S3AInputPolicy inputPolicy; @@ -415,18 +409,10 @@ public OpenFileInformation build() { return this; } - public boolean isS3Select() { - return isS3Select; - } - public S3AFileStatus getStatus() { return status; } - public String getSql() { - return sql; - } - public S3AInputPolicy getInputPolicy() { return inputPolicy; } @@ -454,9 +440,7 @@ public long getSplitEnd() { @Override public String toString() { return "OpenFileInformation{" + - "isSql=" + isS3Select + - ", status=" + status + - ", sql='" + sql + '\'' + + "status=" + status + ", inputPolicy=" + inputPolicy + ", changePolicy=" + changePolicy + ", readAheadRange=" + readAheadRange + @@ -475,16 +459,6 @@ public long getFileLength() { return fileLength; } - /** - * Set builder value. - * @param value new value - * @return the builder - */ - public OpenFileInformation withS3Select(final boolean value) { - isS3Select = value; - return this; - } - /** * Set builder value. * @param value new value @@ -495,16 +469,6 @@ public OpenFileInformation withStatus(final S3AFileStatus value) { return this; } - /** - * Set builder value. - * @param value new value - * @return the builder - */ - public OpenFileInformation withSql(final String value) { - sql = value; - return this; - } - /** * Set builder value. * @param value new value diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java index 9c88870633a35..5a5d537d7a65d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/OperationCallbacks.java @@ -69,7 +69,7 @@ S3ObjectAttributes createObjectAttributes( * Create the read context for reading from the referenced file, * using FS state as well as the status. * @param fileStatus file status. - * @return a context for read and select operations. + * @return a context for read operations. */ S3AReadOpContext createReadContext( FileStatus fileStatus); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java index 17a7189ae220d..c91324da7cb15 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/RequestFactoryImpl.java @@ -43,7 +43,6 @@ import software.amazon.awssdk.services.s3.model.MetadataDirective; import software.amazon.awssdk.services.s3.model.ObjectIdentifier; import software.amazon.awssdk.services.s3.model.PutObjectRequest; -import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; import software.amazon.awssdk.services.s3.model.ServerSideEncryption; import software.amazon.awssdk.services.s3.model.StorageClass; import software.amazon.awssdk.services.s3.model.UploadPartRequest; @@ -585,20 +584,6 @@ public UploadPartRequest.Builder newUploadPartRequestBuilder( return prepareRequest(builder); } - @Override - public SelectObjectContentRequest.Builder newSelectRequestBuilder(String key) { - SelectObjectContentRequest.Builder requestBuilder = - SelectObjectContentRequest.builder().bucket(bucket).key(key); - - EncryptionSecretOperations.getSSECustomerKey(encryptionSecrets).ifPresent(base64customerKey -> { - requestBuilder.sseCustomerAlgorithm(ServerSideEncryption.AES256.name()) - .sseCustomerKey(base64customerKey) - .sseCustomerKeyMD5(Md5Utils.md5AsBase64(Base64.getDecoder().decode(base64customerKey))); - }); - - return prepareRequest(requestBuilder); - } - @Override public ListObjectsRequest.Builder newListObjectsV1RequestBuilder( final String key, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java index 26b6acda30906..51bff4228be0f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/s3guard/S3GuardTool.java @@ -57,7 +57,7 @@ import org.apache.hadoop.fs.s3a.commit.InternalCommitterConstants; import org.apache.hadoop.fs.s3a.impl.DirectoryPolicy; import org.apache.hadoop.fs.s3a.impl.DirectoryPolicyImpl; -import org.apache.hadoop.fs.s3a.select.SelectTool; +import org.apache.hadoop.fs.s3a.select.SelectConstants; import org.apache.hadoop.fs.s3a.tools.BucketTool; import org.apache.hadoop.fs.s3a.tools.MarkerTool; import org.apache.hadoop.fs.shell.CommandFormat; @@ -76,6 +76,7 @@ import static org.apache.hadoop.fs.s3a.commit.CommitConstants.*; import static org.apache.hadoop.fs.s3a.commit.staging.StagingCommitterConstants.FILESYSTEM_TEMP_PATH; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.S3A_DYNAMIC_CAPABILITIES; +import static org.apache.hadoop.fs.s3a.select.SelectConstants.SELECT_UNSUPPORTED; import static org.apache.hadoop.fs.statistics.IOStatisticsLogging.ioStatisticsToPrettyString; import static org.apache.hadoop.fs.statistics.IOStatisticsSupport.retrieveIOStatistics; import static org.apache.hadoop.fs.statistics.StoreStatisticNames.MULTIPART_UPLOAD_ABORTED; @@ -121,7 +122,6 @@ public abstract class S3GuardTool extends Configured implements Tool, "\t" + BucketInfo.NAME + " - " + BucketInfo.PURPOSE + "\n" + "\t" + BucketTool.NAME + " - " + BucketTool.PURPOSE + "\n" + "\t" + MarkerTool.MARKERS + " - " + MarkerTool.PURPOSE + "\n" + - "\t" + SelectTool.NAME + " - " + SelectTool.PURPOSE + "\n" + "\t" + Uploads.NAME + " - " + Uploads.PURPOSE + "\n"; private static final String E_UNSUPPORTED = "This command is no longer supported"; @@ -1004,11 +1004,9 @@ public static int run(Configuration conf, String... args) throws case Uploads.NAME: command = new Uploads(conf); break; - case SelectTool.NAME: - // the select tool is not technically a S3Guard tool, but it's on the CLI - // because this is the defacto S3 CLI. - command = new SelectTool(conf); - break; + case SelectConstants.NAME: + throw new ExitUtil.ExitException( + EXIT_UNSUPPORTED_VERSION, SELECT_UNSUPPORTED); default: printHelp(); throw new ExitUtil.ExitException(E_USAGE, diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java deleted file mode 100644 index 42000f1017259..0000000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/BlockingEnumeration.java +++ /dev/null @@ -1,156 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a.select; - -import java.util.Enumeration; -import java.util.NoSuchElementException; -import java.util.concurrent.BlockingQueue; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.LinkedBlockingQueue; - -import org.reactivestreams.Subscriber; -import org.reactivestreams.Subscription; - -import software.amazon.awssdk.core.async.SdkPublisher; -import software.amazon.awssdk.core.exception.SdkException; - -/** - * Implements the {@link Enumeration} interface by subscribing to a - * {@link SdkPublisher} instance. The enumeration will buffer a fixed - * number of elements and only request new ones from the publisher - * when they are consumed. Calls to {@link #hasMoreElements()} and - * {@link #nextElement()} may block while waiting for new elements. - * @param the type of element. - */ -public final class BlockingEnumeration implements Enumeration { - private static final class Signal { - private final T element; - private final Throwable error; - - Signal(T element) { - this.element = element; - this.error = null; - } - - Signal(Throwable error) { - this.element = null; - this.error = error; - } - } - - private final Signal endSignal = new Signal<>((Throwable)null); - private final CompletableFuture subscription = new CompletableFuture<>(); - private final BlockingQueue> signalQueue; - private final int bufferSize; - private Signal current = null; - - /** - * Create an enumeration with a fixed buffer size and an - * optional injected first element. - * @param publisher the publisher feeding the enumeration. - * @param bufferSize the buffer size. - * @param firstElement (optional) first element the enumeration will return. - */ - public BlockingEnumeration(SdkPublisher publisher, - final int bufferSize, - final T firstElement) { - this.signalQueue = new LinkedBlockingQueue<>(); - this.bufferSize = bufferSize; - if (firstElement != null) { - this.current = new Signal<>(firstElement); - } - publisher.subscribe(new EnumerationSubscriber()); - } - - /** - * Create an enumeration with a fixed buffer size. - * @param publisher the publisher feeding the enumeration. - * @param bufferSize the buffer size. - */ - public BlockingEnumeration(SdkPublisher publisher, - final int bufferSize) { - this(publisher, bufferSize, null); - } - - @Override - public boolean hasMoreElements() { - if (current == null) { - try { - current = signalQueue.take(); - } catch (InterruptedException e) { - current = new Signal<>(e); - subscription.thenAccept(Subscription::cancel); - Thread.currentThread().interrupt(); - } - } - if (current.error != null) { - Throwable error = current.error; - current = endSignal; - if (error instanceof Error) { - throw (Error)error; - } else if (error instanceof SdkException) { - throw (SdkException)error; - } else { - throw SdkException.create("Unexpected error", error); - } - } - return current != endSignal; - } - - @Override - public T nextElement() { - if (!hasMoreElements()) { - throw new NoSuchElementException(); - } - T element = current.element; - current = null; - subscription.thenAccept(s -> s.request(1)); - return element; - } - - private final class EnumerationSubscriber implements Subscriber { - - @Override - public void onSubscribe(Subscription s) { - long request = bufferSize; - if (current != null) { - request--; - } - if (request > 0) { - s.request(request); - } - subscription.complete(s); - } - - @Override - public void onNext(T t) { - signalQueue.add(new Signal<>(t)); - } - - @Override - public void onError(Throwable t) { - signalQueue.add(new Signal<>(t)); - } - - @Override - public void onComplete() { - signalQueue.add(endSignal); - } - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/InternalSelectConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/InternalSelectConstants.java deleted file mode 100644 index fbf5226afb82f..0000000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/InternalSelectConstants.java +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a.select; - -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; -import java.util.Set; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.fs.s3a.impl.InternalConstants; - -import static org.apache.hadoop.fs.s3a.select.SelectConstants.*; - -/** - * Constants for internal use in the org.apache.hadoop.fs.s3a module itself. - * Please don't refer to these outside of this module & its tests. - * If you find you need to then either the code is doing something it - * should not, or these constants need to be uprated to being - * public and stable entries. - */ -@InterfaceAudience.Private -public final class InternalSelectConstants { - - private InternalSelectConstants() { - } - - /** - * An unmodifiable set listing the options - * supported in {@code openFile()}. - */ - public static final Set SELECT_OPTIONS; - - /* - * Build up the options, pulling in the standard set too. - */ - static { - // when adding to this, please keep in alphabetical order after the - // common options and the SQL. - HashSet options = new HashSet<>(Arrays.asList( - SELECT_SQL, - SELECT_ERRORS_INCLUDE_SQL, - SELECT_INPUT_COMPRESSION, - SELECT_INPUT_FORMAT, - SELECT_OUTPUT_FORMAT, - CSV_INPUT_COMMENT_MARKER, - CSV_INPUT_HEADER, - CSV_INPUT_INPUT_FIELD_DELIMITER, - CSV_INPUT_QUOTE_CHARACTER, - CSV_INPUT_QUOTE_ESCAPE_CHARACTER, - CSV_INPUT_RECORD_DELIMITER, - CSV_OUTPUT_FIELD_DELIMITER, - CSV_OUTPUT_QUOTE_CHARACTER, - CSV_OUTPUT_QUOTE_ESCAPE_CHARACTER, - CSV_OUTPUT_QUOTE_FIELDS, - CSV_OUTPUT_RECORD_DELIMITER - )); - options.addAll(InternalConstants.S3A_OPENFILE_KEYS); - SELECT_OPTIONS = Collections.unmodifiableSet(options); - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java deleted file mode 100644 index c3b8abbc2ea88..0000000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectBinding.java +++ /dev/null @@ -1,428 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a.select; - -import java.io.IOException; -import java.util.Locale; - -import software.amazon.awssdk.services.s3.model.CSVInput; -import software.amazon.awssdk.services.s3.model.CSVOutput; -import software.amazon.awssdk.services.s3.model.ExpressionType; -import software.amazon.awssdk.services.s3.model.InputSerialization; -import software.amazon.awssdk.services.s3.model.OutputSerialization; -import software.amazon.awssdk.services.s3.model.QuoteFields; -import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; -import org.apache.hadoop.util.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.commons.lang3.StringUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathIOException; -import org.apache.hadoop.fs.s3a.Retries; -import org.apache.hadoop.fs.s3a.S3AReadOpContext; -import org.apache.hadoop.fs.s3a.S3ObjectAttributes; -import org.apache.hadoop.fs.s3a.WriteOperationHelper; - -import static org.apache.hadoop.util.Preconditions.checkNotNull; -import static org.apache.commons.lang3.StringUtils.isNotEmpty; -import static org.apache.hadoop.fs.s3a.select.SelectConstants.*; - -/** - * Class to do the S3 select binding and build a select request from the - * supplied arguments/configuration. - * - * This class is intended to be instantiated by the owning S3AFileSystem - * instance to handle the construction of requests: IO is still done exclusively - * in the filesystem. - * - */ -public class SelectBinding { - - static final Logger LOG = - LoggerFactory.getLogger(SelectBinding.class); - - /** Operations on the store. */ - private final WriteOperationHelper operations; - - /** Is S3 Select enabled? */ - private final boolean enabled; - private final boolean errorsIncludeSql; - - /** - * Constructor. - * @param operations callback to owner FS, with associated span. - */ - public SelectBinding(final WriteOperationHelper operations) { - this.operations = checkNotNull(operations); - Configuration conf = getConf(); - this.enabled = isSelectEnabled(conf); - this.errorsIncludeSql = conf.getBoolean(SELECT_ERRORS_INCLUDE_SQL, false); - } - - Configuration getConf() { - return operations.getConf(); - } - - /** - * Is the service supported? - * @return true iff select is enabled. - */ - public boolean isEnabled() { - return enabled; - } - - /** - * Static probe for select being enabled. - * @param conf configuration - * @return true iff select is enabled. - */ - public static boolean isSelectEnabled(Configuration conf) { - return conf.getBoolean(FS_S3A_SELECT_ENABLED, true); - } - - /** - * Build and execute a select request. - * @param readContext the read context, which includes the source path. - * @param expression the SQL expression. - * @param builderOptions query options - * @param objectAttributes object attributes from a HEAD request - * @return an FSDataInputStream whose wrapped stream is a SelectInputStream - * @throws IllegalArgumentException argument failure - * @throws IOException failure building, validating or executing the request. - * @throws PathIOException source path is a directory. - */ - @Retries.RetryTranslated - public FSDataInputStream select( - final S3AReadOpContext readContext, - final String expression, - final Configuration builderOptions, - final S3ObjectAttributes objectAttributes) throws IOException { - - return new FSDataInputStream( - executeSelect(readContext, - objectAttributes, - builderOptions, - buildSelectRequest( - readContext.getPath(), - expression, - builderOptions - ))); - } - - /** - * Build a select request. - * @param path source path. - * @param expression the SQL expression. - * @param builderOptions config to extract other query options from - * @return the request to serve - * @throws IllegalArgumentException argument failure - * @throws IOException problem building/validating the request - */ - public SelectObjectContentRequest buildSelectRequest( - final Path path, - final String expression, - final Configuration builderOptions) - throws IOException { - Preconditions.checkState(isEnabled(), - "S3 Select is not enabled for %s", path); - - SelectObjectContentRequest.Builder request = operations.newSelectRequestBuilder(path); - buildRequest(request, expression, builderOptions); - return request.build(); - } - - /** - * Execute the select request. - * @param readContext read context - * @param objectAttributes object attributes from a HEAD request - * @param builderOptions the options which came in from the openFile builder. - * @param request the built up select request. - * @return a SelectInputStream - * @throws IOException failure - * @throws PathIOException source path is a directory. - */ - @Retries.RetryTranslated - private SelectInputStream executeSelect( - final S3AReadOpContext readContext, - final S3ObjectAttributes objectAttributes, - final Configuration builderOptions, - final SelectObjectContentRequest request) throws IOException { - - Path path = readContext.getPath(); - if (readContext.getDstFileStatus().isDirectory()) { - throw new PathIOException(path.toString(), - "Can't select " + path - + " because it is a directory"); - } - boolean sqlInErrors = builderOptions.getBoolean(SELECT_ERRORS_INCLUDE_SQL, - errorsIncludeSql); - String expression = request.expression(); - final String errorText = sqlInErrors ? expression : "Select"; - if (sqlInErrors) { - LOG.info("Issuing SQL request {}", expression); - } - SelectEventStreamPublisher selectPublisher = operations.select(path, request, errorText); - return new SelectInputStream(readContext, - objectAttributes, selectPublisher); - } - - /** - * Build the select request from the configuration built up - * in {@code S3AFileSystem.openFile(Path)} and the default - * options in the cluster configuration. - * - * Options are picked up in the following order. - *
      - *
    1. Options in {@code openFileOptions}.
    2. - *
    3. Options in the owning filesystem configuration.
    4. - *
    5. The default values in {@link SelectConstants}
    6. - *
    - * - * @param requestBuilder request to build up - * @param expression SQL expression - * @param builderOptions the options which came in from the openFile builder. - * @throws IllegalArgumentException if an option is somehow invalid. - * @throws IOException if an option is somehow invalid. - */ - void buildRequest( - final SelectObjectContentRequest.Builder requestBuilder, - final String expression, - final Configuration builderOptions) - throws IllegalArgumentException, IOException { - Preconditions.checkArgument(StringUtils.isNotEmpty(expression), - "No expression provided in parameter " + SELECT_SQL); - - final Configuration ownerConf = operations.getConf(); - - String inputFormat = builderOptions.get(SELECT_INPUT_FORMAT, - SELECT_FORMAT_CSV).toLowerCase(Locale.ENGLISH); - Preconditions.checkArgument(SELECT_FORMAT_CSV.equals(inputFormat), - "Unsupported input format %s", inputFormat); - String outputFormat = builderOptions.get(SELECT_OUTPUT_FORMAT, - SELECT_FORMAT_CSV) - .toLowerCase(Locale.ENGLISH); - Preconditions.checkArgument(SELECT_FORMAT_CSV.equals(outputFormat), - "Unsupported output format %s", outputFormat); - - requestBuilder.expressionType(ExpressionType.SQL); - requestBuilder.expression(expandBackslashChars(expression)); - - requestBuilder.inputSerialization( - buildCsvInput(ownerConf, builderOptions)); - requestBuilder.outputSerialization( - buildCSVOutput(ownerConf, builderOptions)); - } - - /** - * Build the CSV input format for a request. - * @param ownerConf FS owner configuration - * @param builderOptions options on the specific request - * @return the input format - * @throws IllegalArgumentException argument failure - * @throws IOException validation failure - */ - public InputSerialization buildCsvInput( - final Configuration ownerConf, - final Configuration builderOptions) - throws IllegalArgumentException, IOException { - - String headerInfo = opt(builderOptions, - ownerConf, - CSV_INPUT_HEADER, - CSV_INPUT_HEADER_OPT_DEFAULT, - true).toUpperCase(Locale.ENGLISH); - String commentMarker = xopt(builderOptions, - ownerConf, - CSV_INPUT_COMMENT_MARKER, - CSV_INPUT_COMMENT_MARKER_DEFAULT); - String fieldDelimiter = xopt(builderOptions, - ownerConf, - CSV_INPUT_INPUT_FIELD_DELIMITER, - CSV_INPUT_FIELD_DELIMITER_DEFAULT); - String recordDelimiter = xopt(builderOptions, - ownerConf, - CSV_INPUT_RECORD_DELIMITER, - CSV_INPUT_RECORD_DELIMITER_DEFAULT); - String quoteCharacter = xopt(builderOptions, - ownerConf, - CSV_INPUT_QUOTE_CHARACTER, - CSV_INPUT_QUOTE_CHARACTER_DEFAULT); - String quoteEscapeCharacter = xopt(builderOptions, - ownerConf, - CSV_INPUT_QUOTE_ESCAPE_CHARACTER, - CSV_INPUT_QUOTE_ESCAPE_CHARACTER_DEFAULT); - - // CSV input - CSVInput.Builder csvBuilder = CSVInput.builder() - .fieldDelimiter(fieldDelimiter) - .recordDelimiter(recordDelimiter) - .comments(commentMarker) - .quoteCharacter(quoteCharacter); - if (StringUtils.isNotEmpty(quoteEscapeCharacter)) { - csvBuilder.quoteEscapeCharacter(quoteEscapeCharacter); - } - csvBuilder.fileHeaderInfo(headerInfo); - - InputSerialization.Builder inputSerialization = - InputSerialization.builder() - .csv(csvBuilder.build()); - String compression = opt(builderOptions, - ownerConf, - SELECT_INPUT_COMPRESSION, - COMPRESSION_OPT_NONE, - true).toUpperCase(Locale.ENGLISH); - if (isNotEmpty(compression)) { - inputSerialization.compressionType(compression); - } - return inputSerialization.build(); - } - - /** - * Build CSV output format for a request. - * @param ownerConf FS owner configuration - * @param builderOptions options on the specific request - * @return the output format - * @throws IllegalArgumentException argument failure - * @throws IOException validation failure - */ - public OutputSerialization buildCSVOutput( - final Configuration ownerConf, - final Configuration builderOptions) - throws IllegalArgumentException, IOException { - String fieldDelimiter = xopt(builderOptions, - ownerConf, - CSV_OUTPUT_FIELD_DELIMITER, - CSV_OUTPUT_FIELD_DELIMITER_DEFAULT); - String recordDelimiter = xopt(builderOptions, - ownerConf, - CSV_OUTPUT_RECORD_DELIMITER, - CSV_OUTPUT_RECORD_DELIMITER_DEFAULT); - String quoteCharacter = xopt(builderOptions, - ownerConf, - CSV_OUTPUT_QUOTE_CHARACTER, - CSV_OUTPUT_QUOTE_CHARACTER_DEFAULT); - String quoteEscapeCharacter = xopt(builderOptions, - ownerConf, - CSV_OUTPUT_QUOTE_ESCAPE_CHARACTER, - CSV_OUTPUT_QUOTE_ESCAPE_CHARACTER_DEFAULT); - String quoteFields = xopt(builderOptions, - ownerConf, - CSV_OUTPUT_QUOTE_FIELDS, - CSV_OUTPUT_QUOTE_FIELDS_ALWAYS).toUpperCase(Locale.ENGLISH); - - CSVOutput.Builder csvOutputBuilder = CSVOutput.builder() - .quoteCharacter(quoteCharacter) - .quoteFields(QuoteFields.fromValue(quoteFields)) - .fieldDelimiter(fieldDelimiter) - .recordDelimiter(recordDelimiter); - if (!quoteEscapeCharacter.isEmpty()) { - csvOutputBuilder.quoteEscapeCharacter(quoteEscapeCharacter); - } - - // output is CSV, always - return OutputSerialization.builder() - .csv(csvOutputBuilder.build()) - .build(); - } - - /** - * Stringify the given SelectObjectContentRequest, as its - * toString() operator doesn't. - * @param request request to convert to a string - * @return a string to print. Does not contain secrets. - */ - public static String toString(final SelectObjectContentRequest request) { - StringBuilder sb = new StringBuilder(); - sb.append("SelectObjectContentRequest{") - .append("bucket name=").append(request.bucket()) - .append("; key=").append(request.key()) - .append("; expressionType=").append(request.expressionType()) - .append("; expression=").append(request.expression()); - InputSerialization input = request.inputSerialization(); - if (input != null) { - sb.append("; Input") - .append(input.toString()); - } else { - sb.append("; Input Serialization: none"); - } - OutputSerialization out = request.outputSerialization(); - if (out != null) { - sb.append("; Output") - .append(out.toString()); - } else { - sb.append("; Output Serialization: none"); - } - return sb.append("}").toString(); - } - - /** - * Resolve an option. - * @param builderOptions the options which came in from the openFile builder. - * @param fsConf configuration of the owning FS. - * @param base base option (no s3a: prefix) - * @param defVal default value. Must not be null. - * @param trim should the result be trimmed. - * @return the possibly trimmed value. - */ - static String opt(Configuration builderOptions, - Configuration fsConf, - String base, - String defVal, - boolean trim) { - String r = builderOptions.get(base, fsConf.get(base, defVal)); - return trim ? r.trim() : r; - } - - /** - * Get an option with backslash arguments transformed. - * These are not trimmed, so whitespace is significant. - * @param selectOpts options in the select call - * @param fsConf filesystem conf - * @param base base option name - * @param defVal default value - * @return the transformed value - */ - static String xopt(Configuration selectOpts, - Configuration fsConf, - String base, - String defVal) { - return expandBackslashChars( - opt(selectOpts, fsConf, base, defVal, false)); - } - - /** - * Perform escaping. - * @param src source string. - * @return the replaced value - */ - static String expandBackslashChars(String src) { - return src.replace("\\n", "\n") - .replace("\\\"", "\"") - .replace("\\t", "\t") - .replace("\\r", "\r") - .replace("\\\"", "\"") - // backslash substitution must come last - .replace("\\\\", "\\"); - } - - -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectConstants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectConstants.java index 0e2bf914f83c5..d1c977f92824d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectConstants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectConstants.java @@ -25,13 +25,19 @@ * Options related to S3 Select. * * These options are set for the entire filesystem unless overridden - * as an option in the URI + * as an option in the URI. + * + * The S3 Select API is no longer supported -however this class is retained + * so that any application which imports the dependencies will still link. */ @InterfaceAudience.Public -@InterfaceStability.Unstable +@InterfaceStability.Stable +@Deprecated public final class SelectConstants { - public static final String SELECT_UNSUPPORTED = "S3 Select is not supported"; + public static final String SELECT_UNSUPPORTED = "S3 Select is no longer supported"; + + public static final String NAME = "select"; private SelectConstants() { } @@ -41,13 +47,18 @@ private SelectConstants() { /** * This is the big SQL expression: {@value}. - * When used in an open() call, switch to a select operation. - * This is only used in the open call, never in a filesystem configuration. + * When used in an open() call: + *
      + *
    1. if the option is set in a {@code .may()} clause: warn and continue
    2. + *
    3. if the option is set in a {@code .must()} clause: + * {@code UnsupportedOperationException}.
    4. + *
    */ public static final String SELECT_SQL = FS_S3A_SELECT + "sql"; /** * Does the FS Support S3 Select? + * This is false everywhere. * Value: {@value}. */ public static final String S3_SELECT_CAPABILITY = "fs.s3a.capability.select.sql"; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java deleted file mode 100644 index c71ea5f1623a1..0000000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectEventStreamPublisher.java +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a.select; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.io.SequenceInputStream; -import java.util.concurrent.CompletableFuture; -import java.util.function.Consumer; - -import org.reactivestreams.Subscriber; - -import software.amazon.awssdk.core.async.SdkPublisher; -import software.amazon.awssdk.http.AbortableInputStream; -import software.amazon.awssdk.services.s3.model.EndEvent; -import software.amazon.awssdk.services.s3.model.RecordsEvent; -import software.amazon.awssdk.services.s3.model.SelectObjectContentEventStream; -import software.amazon.awssdk.services.s3.model.SelectObjectContentResponse; -import software.amazon.awssdk.utils.ToString; - -/** - * Async publisher of {@link SelectObjectContentEventStream}s returned - * from a SelectObjectContent call. - */ -public final class SelectEventStreamPublisher implements - SdkPublisher { - - private final CompletableFuture selectOperationFuture; - private final SelectObjectContentResponse response; - private final SdkPublisher publisher; - - /** - * Create the publisher. - * @param selectOperationFuture SelectObjectContent future - * @param response SelectObjectContent response - * @param publisher SelectObjectContentEventStream publisher to wrap - */ - public SelectEventStreamPublisher( - CompletableFuture selectOperationFuture, - SelectObjectContentResponse response, - SdkPublisher publisher) { - this.selectOperationFuture = selectOperationFuture; - this.response = response; - this.publisher = publisher; - } - - /** - * Retrieve an input stream to the subset of the S3 object that matched the select query. - * This is equivalent to loading the content of all RecordsEvents into an InputStream. - * This will lazily-load the content from S3, minimizing the amount of memory used. - * @param onEndEvent callback on the end event - * @return the input stream - */ - public AbortableInputStream toRecordsInputStream(Consumer onEndEvent) { - SdkPublisher recordInputStreams = this.publisher - .filter(e -> { - if (e instanceof RecordsEvent) { - return true; - } else if (e instanceof EndEvent) { - onEndEvent.accept((EndEvent) e); - } - return false; - }) - .map(e -> ((RecordsEvent) e).payload().asInputStream()); - - // Subscribe to the async publisher using an enumeration that will - // buffer a single chunk (RecordsEvent's payload) at a time and - // block until it is consumed. - // Also inject an empty stream as the first element that - // SequenceInputStream will request on construction. - BlockingEnumeration enumeration = - new BlockingEnumeration(recordInputStreams, 1, EMPTY_STREAM); - return AbortableInputStream.create( - new SequenceInputStream(enumeration), - this::cancel); - } - - /** - * The response from the SelectObjectContent call. - * @return the response object - */ - public SelectObjectContentResponse response() { - return response; - } - - @Override - public void subscribe(Subscriber subscriber) { - publisher.subscribe(subscriber); - } - - /** - * Cancel the operation. - */ - public void cancel() { - selectOperationFuture.cancel(true); - } - - @Override - public String toString() { - return ToString.builder("SelectObjectContentEventStream") - .add("response", response) - .add("publisher", publisher) - .build(); - } - - private static final InputStream EMPTY_STREAM = - new ByteArrayInputStream(new byte[0]); -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java deleted file mode 100644 index 3586d83a0a434..0000000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectInputStream.java +++ /dev/null @@ -1,455 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a.select; - -import java.io.EOFException; -import java.io.IOException; -import java.util.concurrent.atomic.AtomicBoolean; -import java.util.concurrent.atomic.AtomicLong; - -import software.amazon.awssdk.core.exception.AbortedException; -import software.amazon.awssdk.http.AbortableInputStream; -import org.apache.hadoop.util.Preconditions; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.fs.CanSetReadahead; -import org.apache.hadoop.fs.FSExceptionMessages; -import org.apache.hadoop.fs.FSInputStream; -import org.apache.hadoop.fs.PathIOException; -import org.apache.hadoop.fs.s3a.Retries; -import org.apache.hadoop.fs.s3a.S3AReadOpContext; -import org.apache.hadoop.fs.s3a.S3ObjectAttributes; -import org.apache.hadoop.fs.s3a.statistics.S3AInputStreamStatistics; -import org.apache.hadoop.io.IOUtils; - - -import static org.apache.hadoop.util.Preconditions.checkNotNull; -import static org.apache.commons.lang3.StringUtils.isNotEmpty; -import static org.apache.hadoop.fs.s3a.Invoker.once; -import static org.apache.hadoop.fs.s3a.S3AInputStream.validateReadahead; - -/** - * An input stream for S3 Select return values. - * This is simply an end-to-end GET request, without any - * form of seek or recovery from connectivity failures. - * - * Currently only seek and positioned read operations on the current - * location are supported. - * - * The normal S3 input counters are updated by this stream. - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -public class SelectInputStream extends FSInputStream implements - CanSetReadahead { - - private static final Logger LOG = - LoggerFactory.getLogger(SelectInputStream.class); - - public static final String SEEK_UNSUPPORTED = "seek()"; - - /** - * Same set of arguments as for an S3AInputStream. - */ - private final S3ObjectAttributes objectAttributes; - - /** - * Tracks the current position. - */ - private AtomicLong pos = new AtomicLong(0); - - /** - * Closed flag. - */ - private final AtomicBoolean closed = new AtomicBoolean(false); - - /** - * Did the read complete successfully? - */ - private final AtomicBoolean completedSuccessfully = new AtomicBoolean(false); - - /** - * Abortable response stream. - * This is guaranteed to never be null. - */ - private final AbortableInputStream wrappedStream; - - private final String bucket; - - private final String key; - - private final String uri; - - private final S3AReadOpContext readContext; - - private final S3AInputStreamStatistics streamStatistics; - - private long readahead; - - /** - * Create the stream. - * The read attempt is initiated immediately. - * @param readContext read context - * @param objectAttributes object attributes from a HEAD request - * @param selectPublisher event stream publisher from the already executed call - * @throws IOException failure - */ - @Retries.OnceTranslated - public SelectInputStream( - final S3AReadOpContext readContext, - final S3ObjectAttributes objectAttributes, - final SelectEventStreamPublisher selectPublisher) throws IOException { - Preconditions.checkArgument(isNotEmpty(objectAttributes.getBucket()), - "No Bucket"); - Preconditions.checkArgument(isNotEmpty(objectAttributes.getKey()), - "No Key"); - this.objectAttributes = objectAttributes; - this.bucket = objectAttributes.getBucket(); - this.key = objectAttributes.getKey(); - this.uri = "s3a://" + this.bucket + "/" + this.key; - this.readContext = readContext; - this.readahead = readContext.getReadahead(); - this.streamStatistics = readContext.getS3AStatisticsContext() - .newInputStreamStatistics(); - - AbortableInputStream stream = once( - "S3 Select", - uri, - () -> { - return selectPublisher.toRecordsInputStream(e -> { - LOG.debug("Completed successful S3 select read from {}", uri); - completedSuccessfully.set(true); - }); - }); - - this.wrappedStream = checkNotNull(stream); - // this stream is already opened, so mark as such in the statistics. - streamStatistics.streamOpened(); - } - - @Override - public void close() throws IOException { - long skipped = 0; - boolean aborted = false; - if (!closed.getAndSet(true)) { - try { - // set up for aborts. - // if we know the available amount > readahead. Abort. - // - boolean shouldAbort = wrappedStream.available() > readahead; - if (!shouldAbort) { - // read our readahead range worth of data - skipped = wrappedStream.skip(readahead); - shouldAbort = wrappedStream.read() >= 0; - } - // now, either there is data left or not. - if (shouldAbort) { - // yes, more data. Abort and add this fact to the stream stats - aborted = true; - wrappedStream.abort(); - } - } catch (IOException | AbortedException e) { - LOG.debug("While closing stream", e); - } finally { - IOUtils.cleanupWithLogger(LOG, wrappedStream); - streamStatistics.streamClose(aborted, skipped); - streamStatistics.close(); - super.close(); - } - } - } - - /** - * Verify that the input stream is open. Non blocking; this gives - * the last state of the atomic {@link #closed} field. - * @throws PathIOException if the connection is closed. - */ - private void checkNotClosed() throws IOException { - if (closed.get()) { - throw new PathIOException(uri, FSExceptionMessages.STREAM_IS_CLOSED); - } - } - - @Override - public int available() throws IOException { - checkNotClosed(); - return wrappedStream.available(); - } - - @Override - @Retries.OnceTranslated - public synchronized long skip(final long n) throws IOException { - checkNotClosed(); - long skipped = once("skip", uri, () -> wrappedStream.skip(n)); - pos.addAndGet(skipped); - // treat as a forward skip for stats - streamStatistics.seekForwards(skipped, skipped); - return skipped; - } - - @Override - public long getPos() { - return pos.get(); - } - - /** - * Set the readahead. - * @param readahead The readahead to use. null means to use the default. - */ - @Override - public void setReadahead(Long readahead) { - this.readahead = validateReadahead(readahead); - } - - /** - * Get the current readahead value. - * @return the readahead - */ - public long getReadahead() { - return readahead; - } - - /** - * Read a byte. There's no attempt to recover, but AWS-SDK exceptions - * such as {@code SelectObjectContentEventException} are translated into - * IOExceptions. - * @return a byte read or -1 for an end of file. - * @throws IOException failure. - */ - @Override - @Retries.OnceTranslated - public synchronized int read() throws IOException { - checkNotClosed(); - int byteRead; - try { - byteRead = once("read()", uri, () -> wrappedStream.read()); - } catch (EOFException e) { - // this could be one of: end of file, some IO failure - if (completedSuccessfully.get()) { - // read was successful - return -1; - } else { - // the stream closed prematurely - LOG.info("Reading of S3 Select data from {} failed before all results " - + " were generated.", uri); - streamStatistics.readException(); - throw new PathIOException(uri, - "Read of S3 Select data did not complete"); - } - } - - if (byteRead >= 0) { - incrementBytesRead(1); - } - return byteRead; - } - - @SuppressWarnings("NullableProblems") - @Override - @Retries.OnceTranslated - public synchronized int read(final byte[] buf, final int off, final int len) - throws IOException { - checkNotClosed(); - validatePositionedReadArgs(pos.get(), buf, off, len); - if (len == 0) { - return 0; - } - - int bytesRead; - try { - streamStatistics.readOperationStarted(pos.get(), len); - bytesRead = wrappedStream.read(buf, off, len); - } catch (EOFException e) { - streamStatistics.readException(); - // the base implementation swallows EOFs. - return -1; - } - - incrementBytesRead(bytesRead); - streamStatistics.readOperationCompleted(len, bytesRead); - return bytesRead; - } - - /** - * Forward seeks are supported, but not backwards ones. - * Forward seeks are implemented using read, so - * means that long-distance seeks will be (literally) expensive. - * - * @param newPos new seek position. - * @throws PathIOException Backwards seek attempted. - * @throws EOFException attempt to seek past the end of the stream. - * @throws IOException IO failure while skipping bytes - */ - @Override - @Retries.OnceTranslated - public synchronized void seek(long newPos) throws IOException { - long current = getPos(); - long distance = newPos - current; - if (distance < 0) { - throw unsupported(SEEK_UNSUPPORTED - + " backwards from " + current + " to " + newPos); - } - if (distance == 0) { - LOG.debug("ignoring seek to current position."); - } else { - // the complicated one: Forward seeking. Useful for split files. - LOG.debug("Forward seek by reading {} bytes", distance); - long bytesSkipped = 0; - // read byte-by-byte, hoping that buffering will compensate for this. - // doing it this way ensures that the seek stops at exactly the right - // place. skip(len) can return a smaller value, at which point - // it's not clear what to do. - while(distance > 0) { - int r = read(); - if (r == -1) { - // reached an EOF too early - throw new EOFException("Seek to " + newPos - + " reached End of File at offset " + getPos()); - } - distance--; - bytesSkipped++; - } - // read has finished. - streamStatistics.seekForwards(bytesSkipped, bytesSkipped); - } - } - - /** - * Build an exception to raise when an operation is not supported here. - * @param action action which is Unsupported. - * @return an exception to throw. - */ - protected PathIOException unsupported(final String action) { - return new PathIOException( - String.format("s3a://%s/%s", bucket, key), - action + " not supported"); - } - - @Override - public boolean seekToNewSource(long targetPos) throws IOException { - return false; - } - - // Not supported. - @Override - public boolean markSupported() { - return false; - } - - @SuppressWarnings("NonSynchronizedMethodOverridesSynchronizedMethod") - @Override - public void mark(int readLimit) { - // Do nothing - } - - @SuppressWarnings("NonSynchronizedMethodOverridesSynchronizedMethod") - @Override - public void reset() throws IOException { - throw unsupported("Mark"); - } - - /** - * Aborts the IO. - */ - public void abort() { - if (!closed.get()) { - LOG.debug("Aborting"); - wrappedStream.abort(); - } - } - - /** - * Read at a specific position. - * Reads at a position earlier than the current {@link #getPos()} position - * will fail with a {@link PathIOException}. See {@link #seek(long)}. - * Unlike the base implementation And the requirements of the filesystem - * specification, this updates the stream position as returned in - * {@link #getPos()}. - * @param position offset in the stream. - * @param buffer buffer to read in to. - * @param offset offset within the buffer - * @param length amount of data to read. - * @return the result. - * @throws PathIOException Backwards seek attempted. - * @throws EOFException attempt to seek past the end of the stream. - * @throws IOException IO failure while seeking in the stream or reading data. - */ - @Override - public int read(final long position, - final byte[] buffer, - final int offset, - final int length) - throws IOException { - // maybe seek forwards to the position. - seek(position); - return read(buffer, offset, length); - } - - /** - * Increment the bytes read counter if there is a stats instance - * and the number of bytes read is more than zero. - * This also updates the {@link #pos} marker by the same value. - * @param bytesRead number of bytes read - */ - private void incrementBytesRead(long bytesRead) { - if (bytesRead > 0) { - pos.addAndGet(bytesRead); - } - streamStatistics.bytesRead(bytesRead); - if (readContext.getStats() != null && bytesRead > 0) { - readContext.getStats().incrementBytesRead(bytesRead); - } - } - - /** - * Get the Stream statistics. - * @return the statistics for this stream. - */ - @InterfaceAudience.Private - @InterfaceStability.Unstable - public S3AInputStreamStatistics getS3AStreamStatistics() { - return streamStatistics; - } - - /** - * String value includes statistics as well as stream state. - * Important: there are no guarantees as to the stability - * of this value. - * @return a string value for printing in logs/diagnostics - */ - @Override - @InterfaceStability.Unstable - public String toString() { - String s = streamStatistics.toString(); - synchronized (this) { - final StringBuilder sb = new StringBuilder( - "SelectInputStream{"); - sb.append(uri); - sb.append("; state ").append(!closed.get() ? "open" : "closed"); - sb.append("; pos=").append(getPos()); - sb.append("; readahead=").append(readahead); - sb.append('\n').append(s); - sb.append('}'); - return sb.toString(); - } - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java deleted file mode 100644 index 8233e67eea0a5..0000000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectObjectContentHelper.java +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a.select; - -import java.io.IOException; -import java.util.concurrent.CompletableFuture; -import java.util.concurrent.CompletionException; - -import software.amazon.awssdk.core.async.SdkPublisher; -import software.amazon.awssdk.core.exception.SdkException; -import software.amazon.awssdk.services.s3.model.SelectObjectContentEventStream; -import software.amazon.awssdk.services.s3.model.SelectObjectContentRequest; -import software.amazon.awssdk.services.s3.model.SelectObjectContentResponse; -import software.amazon.awssdk.services.s3.model.SelectObjectContentResponseHandler; - -import org.apache.commons.lang3.tuple.Pair; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.s3a.S3AUtils; - -import static org.apache.hadoop.fs.s3a.WriteOperationHelper.WriteOperationHelperCallbacks; - -/** - * Helper for SelectObjectContent queries against an S3 Bucket. - */ -public final class SelectObjectContentHelper { - - private SelectObjectContentHelper() { - } - - /** - * Execute an S3 Select operation. - * @param writeOperationHelperCallbacks helper callbacks - * @param source source for selection - * @param request Select request to issue. - * @param action the action for use in exception creation - * @return the select response event stream publisher - * @throws IOException on failure - */ - public static SelectEventStreamPublisher select( - WriteOperationHelperCallbacks writeOperationHelperCallbacks, - Path source, - SelectObjectContentRequest request, - String action) - throws IOException { - try { - Handler handler = new Handler(); - CompletableFuture selectOperationFuture = - writeOperationHelperCallbacks.selectObjectContent(request, handler); - return handler.eventPublisher(selectOperationFuture).join(); - } catch (Throwable e) { - if (e instanceof CompletionException) { - e = e.getCause(); - } - IOException translated; - if (e instanceof SdkException) { - translated = S3AUtils.translateException(action, source, - (SdkException)e); - } else { - translated = new IOException(e); - } - throw translated; - } - } - - private static class Handler implements SelectObjectContentResponseHandler { - private volatile CompletableFuture>> responseAndPublisherFuture = - new CompletableFuture<>(); - - private volatile SelectObjectContentResponse response; - - public CompletableFuture eventPublisher( - CompletableFuture selectOperationFuture) { - return responseAndPublisherFuture.thenApply(p -> - new SelectEventStreamPublisher(selectOperationFuture, - p.getLeft(), p.getRight())); - } - - @Override - public void responseReceived(SelectObjectContentResponse selectObjectContentResponse) { - this.response = selectObjectContentResponse; - } - - @Override - public void onEventStream(SdkPublisher publisher) { - responseAndPublisherFuture.complete(Pair.of(response, publisher)); - } - - @Override - public void exceptionOccurred(Throwable error) { - responseAndPublisherFuture.completeExceptionally(error); - } - - @Override - public void complete() { - } - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectTool.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectTool.java deleted file mode 100644 index 7a6c1afdc1fc3..0000000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/select/SelectTool.java +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a.select; - -import java.io.BufferedReader; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.InputStreamReader; -import java.io.OutputStream; -import java.io.PrintStream; -import java.nio.charset.StandardCharsets; -import java.util.List; -import java.util.Locale; -import java.util.Optional; -import java.util.Scanner; - -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import org.apache.commons.io.IOUtils; -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FSDataInputStream; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.FutureDataInputStreamBuilder; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool; -import org.apache.hadoop.fs.shell.CommandFormat; -import org.apache.hadoop.util.DurationInfo; -import org.apache.hadoop.util.ExitUtil; -import org.apache.hadoop.util.OperationDuration; -import org.apache.hadoop.util.functional.FutureIO; - -import static org.apache.commons.lang3.StringUtils.isNotEmpty; -import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; -import static org.apache.hadoop.service.launcher.LauncherExitCodes.*; -import static org.apache.hadoop.fs.s3a.select.SelectConstants.*; - -/** - * This is a CLI tool for the select operation, which is available - * through the S3Guard command. - * - * Usage: - *
    - *   hadoop s3guard select [options] Path Statement
    - * 
    - */ -public class SelectTool extends S3GuardTool { - - private static final Logger LOG = - LoggerFactory.getLogger(SelectTool.class); - - public static final String NAME = "select"; - - public static final String PURPOSE = "make an S3 Select call"; - - private static final String USAGE = NAME - + " [OPTIONS]" - + " [-limit rows]" - + " [-header (use|none|ignore)]" - + " [-out path]" - + " [-expected rows]" - + " [-compression (gzip|bzip2|none)]" - + " [-inputformat csv]" - + " [-outputformat csv]" - + " -``` - -The output is printed, followed by some summary statistics, unless the `-out` -option is used to declare a destination file. In this mode -status will be logged to the console, but the output of the query will be -saved directly to the output file. - -### Example 1 - -Read the first 100 rows of the landsat dataset where cloud cover is zero: - -```bash -hadoop s3guard select -header use -compression gzip -limit 100 \ - s3a://landsat-pds/scene_list.gz \ - "SELECT * FROM S3OBJECT s WHERE s.cloudCover = '0.0'" -``` - -### Example 2 - -Return the `entityId` column for all rows in the dataset where the cloud -cover was "0.0", and save it to the file `output.csv`: - -```bash -hadoop s3guard select -header use -out s3a://mybucket/output.csv \ - -compression gzip \ - s3a://landsat-pds/scene_list.gz \ - "SELECT s.entityId from S3OBJECT s WHERE s.cloudCover = '0.0'" -``` - -This file will: - -1. Be UTF-8 encoded. -1. Have quotes on all columns returned. -1. Use commas as a separator. -1. Not have any header. - -The output can be saved to a file with the `-out` option. Note also that -`-D key=value` settings can be used to control the operation, if placed after -the `s3guard` command and before `select` - - -```bash -hadoop s3guard \ - -D s.s3a.select.output.csv.quote.fields=asneeded \ - select \ - -header use \ - -compression gzip \ - -limit 500 \ - -inputformat csv \ - -outputformat csv \ - -out s3a://hwdev-steve-new/output.csv \ - s3a://landsat-pds/scene_list.gz \ - "SELECT s.entityId from S3OBJECT s WHERE s.cloudCover = '0.0'" -``` - - -## Use in MR/Analytics queries: Partially Supported - -S3 Select support in analytics queries is only partially supported. -It does not work reliably with large source files where the work is split up, -and as the various query engines all assume that .csv and .json formats are splittable, -things go very wrong, fast. - -As a proof of concept *only*, S3 Select queries can be made through -MapReduce jobs which use any Hadoop `RecordReader` -class which uses the new `openFile()` API. - -Currently this consists of the following MRv2 readers. - -``` -org.apache.hadoop.mapreduce.lib.input.LineRecordReader -org.apache.hadoop.mapreduce.lib.input.FixedLengthRecordReader -``` - -And a limited number of the MRv1 record readers: - -``` -org.apache.hadoop.mapred.LineRecordReader -``` - -All of these readers use the new API and can be have its optional/mandatory -options set via the `JobConf` used when creating/configuring the reader. - -These readers are instantiated within input formats; the following -formats therefore support S3 Select. - -``` -org.apache.hadoop.mapreduce.lib.input.FixedLengthInputFormat -org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat -org.apache.hadoop.mapreduce.lib.input.NLineInputFormat -org.apache.hadoop.mapreduce.lib.input.TextInputFormat -org.apache.hadoop.mapred.KeyValueTextInputFormat -org.apache.hadoop.mapred.TextInputFormat -org.apache.hadoop.mapred.lib.NLineInputFormat -``` - -All `JobConf` options which begin with the prefix `mapreduce.job.input.file.option.` -will have that prefix stripped and the remainder used as the name for an option -when opening the file. - -All `JobConf` options which being with the prefix `mapreduce.job.input.file.must.` -will be converted into mandatory options. - -To use an S3 Select call, set the following options - -``` -mapreduce.job.input.file.must.fs.s3a.select.sql = -mapreduce.job.input.file.must.fs.s3a.select.input.format = CSV -mapreduce.job.input.file.must.fs.s3a.select.output.format = CSV -``` - -Further options may be set to tune the behaviour, for example: - -```java -jobConf.set("mapreduce.job.input.file.must.fs.s3a.select.input.csv.header", "use"); -``` - -*Note* How to tell if a reader has migrated to the new `openFile()` builder -API: - -Set a mandatory option which is not known; if the job does not fail then -an old reader is being used. - -```java -jobConf.set("mapreduce.job.input.file.must.unknown.option", "anything"); -``` - - -### Querying Compressed objects - -S3 Select queries can be made against gzipped source files; the S3A input -stream receives the output in text format, rather than as a (re)compressed -stream. - -To read a gzip file, set `fs.s3a.select.input.compression` to `gzip`. - -```java -jobConf.set("mapreduce.job.input.file.must.fs.s3a.select.input.compression", - "gzip"); -``` - - -Most of the Hadoop RecordReader classes automatically choose a decompressor -based on the extension of the source file. This causes problems when -reading `.gz` files, because S3 Select is automatically decompressing and -returning csv-formatted text. - -By default, a query across gzipped files will fail with the error -"IOException: not a gzip file" - -To avoid this problem, declare that the job should switch to the -"Passthrough Codec" for all files with a ".gz" extension: - -```java -jobConf.set("io.compression.codecs", - "org.apache.hadoop.io.compress.PassthroughCodec"); -jobConf.set("io.compress.passthrough.extension", ".gz"); -``` - -Obviously, this breaks normal `.gz` decompression: only set it on S3 Select -jobs. - -## S3 Select configuration options. - -Consult the javadocs for `org.apache.hadoop.fs.s3a.select.SelectConstants`. - -The listed options can be set in `core-site.xml`, supported by S3A per-bucket -configuration, and can be set programmatically on the `Configuration` object -use to configure a new filesystem instance. - -Any of these options can be set in the builder returned by the `openFile()` call -—simply set them through a chain of `builder.must()` operations. - -```xml - - fs.s3a.select.input.format - csv - Input format - - - - fs.s3a.select.output.format - csv - Output format - - - - fs.s3a.select.input.csv.comment.marker - # - In S3 Select queries: the marker for comment lines in CSV files - - - - fs.s3a.select.input.csv.record.delimiter - \n - In S3 Select queries over CSV files: the record delimiter. - \t is remapped to the TAB character, \r to CR \n to newline. \\ to \ - and \" to " - - - - - fs.s3a.select.input.csv.field.delimiter - , - In S3 Select queries over CSV files: the field delimiter. - \t is remapped to the TAB character, \r to CR \n to newline. \\ to \ - and \" to " - - - - - fs.s3a.select.input.csv.quote.character - " - In S3 Select queries over CSV files: quote character. - \t is remapped to the TAB character, \r to CR \n to newline. \\ to \ - and \" to " - - - - - fs.s3a.select.input.csv.quote.escape.character - \\ - In S3 Select queries over CSV files: quote escape character. - \t is remapped to the TAB character, \r to CR \n to newline. \\ to \ - and \" to " - - - - - fs.s3a.select.input.csv.header - none - In S3 Select queries over CSV files: what is the role of the header? One of "none", "ignore" and "use" - - - - fs.s3a.select.input.compression - none - In S3 Select queries, the source compression - algorithm. One of: "none" and "gzip" - - - - fs.s3a.select.output.csv.quote.fields - always - - In S3 Select queries: should fields in generated CSV Files be quoted? - One of: "always", "asneeded". - - - - - fs.s3a.select.output.csv.quote.character - " - - In S3 Select queries: the quote character for generated CSV Files. - - - - - fs.s3a.select.output.csv.quote.escape.character - \\ - - In S3 Select queries: the quote escape character for generated CSV Files. - - - - - fs.s3a.select.output.csv.record.delimiter - \n - - In S3 Select queries: the record delimiter for generated CSV Files. - - - - - fs.s3a.select.output.csv.field.delimiter - , - - In S3 Select queries: the field delimiter for generated CSV Files. - - - - - fs.s3a.select.errors.include.sql - false - - Include the SQL statement in errors: this is useful for development but - may leak security and Personally Identifying Information in production, - so must be disabled there. - - -``` - -## Security and Privacy - -SQL Injection attacks are the classic attack on data. -Because S3 Select is a read-only API, the classic ["Bobby Tables"](https://xkcd.com/327/) -attack to gain write access isn't going to work. Even so: sanitize your inputs. - -CSV does have security issues of its own, specifically: - -*Excel and other spreadsheets may interpret some fields beginning with special -characters as formula, and execute them* - -S3 Select does not appear vulnerable to this, but in workflows where untrusted -data eventually ends up in a spreadsheet (including Google Document spreadsheets), -the data should be sanitized/audited first. There is no support for -such sanitization in S3 Select or in the S3A connector. - -Logging Select statements may expose secrets if they are in the statement. -Even if they are just logged, this may potentially leak Personally Identifying -Information as covered in the EU GDPR legislation and equivalents. - -For both privacy and security reasons, SQL statements are not included -in exception strings by default, nor logged at INFO level. - -To enable them, set `fs.s3a.select.errors.include.sql` to `true`, either in the -site/application configuration, or as an option in the builder for a -single request. When set, the request will also be logged at -the INFO level of the log `org.apache.hadoop.fs.s3a.select.SelectBinding`. - -Personal Identifiable Information is not printed in the AWS S3 logs. -Those logs contain only the SQL keywords from the query planner. -All column names and literals are masked. Following is a sample log example: - -*Query:* - -```sql -SELECT * FROM S3OBJECT s; -``` - -*Log:* - -```sql -select (project (list (project_all))) (from (as str0 (id str1 case_insensitive))) -``` - -Note also that: - -1. Debug-level Hadoop logs for the module `org.apache.hadoop.fs.s3a` and other -components's debug logs may also log the SQL statements (e.g. aws-sdk HTTP logs). - -The best practise here is: only enable SQL in exceptions while developing -SQL queries, especially in an application/notebook where the exception -text is a lot easier to see than the application logs. - -In production: don't log or report. If you do, all logs and output must be -considered sensitive from security and privacy perspectives. - -The `hadoop s3guard select` command does enable the logging, so -can be used as an initial place to experiment with the SQL syntax. -Rationale: if you are constructing SQL queries on the command line, -your shell history is already tainted with the query. - -### Links - -* [CVE-2014-3524](https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2014-3524). -* [The Absurdly Underestimated Dangers of CSV Injection](http://georgemauer.net/2017/10/07/csv-injection.html). -* [Comma Separated Vulnerabilities](https://www.contextis.com/blog/comma-separated-vulnerabilities). - -### SQL Syntax - -The SQL Syntax directly supported by the AWS S3 Select API is [documented by -Amazon](https://docs.aws.amazon.com/AmazonS3/latest/dev/s3-glacier-select-sql-reference.html). - -* Use single quotes for all constants, not double quotes. -* All CSV column values are strings unless cast to a type -* Simple `SELECT` calls, no `JOIN`. - -### CSV formats - -"CSV" is less a format, more "a term meaning the data is in some nonstandard -line-by-line" text file, and there are even "multiline CSV files". - -S3 Select only supports a subset of the loose "CSV" concept, as covered in -the AWS documentation. There are also limits on how many columns and how -large a single line may be. - -The specific quotation character, field and record delimiters, comments and escape -characters can be configured in the Hadoop configuration. - -### Consistency, Concurrency and Error handling - -**Consistency** - -Since November 2020, AWS S3 has been fully consistent. -This also applies to S3 Select. -We do not know what happens if an object is overwritten while a query is active. - - -**Concurrency** - -The outcome of what happens when source file is overwritten while the result of -a select call is overwritten is undefined. - -The input stream returned by the operation is *NOT THREAD SAFE*. - -**Error Handling** - -If an attempt to issue an S3 select call fails, the S3A connector will -reissue the request if-and-only-if it believes a retry may succeed. -That is: it considers the operation to be idempotent and if the failure is -considered to be a recoverable connectivity problem or a server-side rejection -which can be retried (500, 503). - -If an attempt to read data from an S3 select stream (`org.apache.hadoop.fs.s3a.select.SelectInputStream)` fails partway through the read, *no attempt is made to retry the operation* - -In contrast, the normal S3A input stream tries to recover from (possibly transient) -failures by attempting to reopen the file. - - -## Performance - -The select operation is best when the least amount of data is returned by -the query, as this reduces the amount of data downloaded. - -* Limit the number of columns projected to only those needed. -* Use `LIMIT` to set an upper limit on the rows read, rather than implementing -a row counter in application code and closing the stream when reached. -This avoids having to abort the HTTPS connection and negotiate a new one -on the next S3 request. - -The select call itself can be slow, especially when the source is a multi-MB -compressed file with aggressive filtering in the `WHERE` clause. -Assumption: the select query starts at row 1 and scans through each row, -and does not return data until it has matched one or more rows. - -If the asynchronous nature of the `openFile().build().get()` sequence -can be taken advantage of, by performing other work before or in parallel -to the `get()` call: do it. - -## Troubleshooting - -### `NoClassDefFoundError: software/amazon/eventstream/MessageDecoder` - -Select operation failing with a missing eventstream class. - +// fails +openFile("s3a://bucket/path") + .must("fs.s3a.select.sql", "SELECT ...") + .get(); ``` -java.io.IOException: java.lang.NoClassDefFoundError: software/amazon/eventstream/MessageDecoder -at org.apache.hadoop.fs.s3a.select.SelectObjectContentHelper.select(SelectObjectContentHelper.java:75) -at org.apache.hadoop.fs.s3a.WriteOperationHelper.lambda$select$10(WriteOperationHelper.java:660) -at org.apache.hadoop.fs.store.audit.AuditingFunctions.lambda$withinAuditSpan$0(AuditingFunctions.java:62) -at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:122) -``` - -The eventstream JAR is not on the classpath/not in sync with the version of the full "bundle.jar" JDK - -Fix: get a compatible version of the JAR on the classpath. - -### SQL errors - -Getting S3 Select code to work is hard, though those knowledgeable in SQL -will find it easier. - -Problems can be split into: - -1. Basic configuration of the client to issue the query. -1. Bad SQL select syntax and grammar. -1. Datatype casting issues -1. Bad records/data in source files. -1. Failure to configure MR jobs to work correctly. - -The exceptions here are all based on the experience during writing tests; -more may surface with broader use. - -All failures other than network errors on request initialization are considered -unrecoverable and will not be reattempted. - -As parse-time errors always state the line and column of an error, you can -simplify debugging by breaking a SQL statement across lines, e.g. +Any `openFile()` call to an S3A Path where a SQL query is passed in as a `may()` +clause SHALL be logged at WARN level the first time it is invoked, then ignored. ```java -String sql = "SELECT\n" - + "s.entityId \n" - + "FROM " + "S3OBJECT s WHERE\n" - + "s.\"cloudCover\" = '100.0'\n" - + " LIMIT 100"; +// ignores the option after printing a warning. +openFile("s3a://bucket/path") + .may("fs.s3a.select.sql", "SELECT ...") + .get(); ``` -Now if the error is declared as "line 4", it will be on the select conditions; -the column offset will begin from the first character on that row. - -The SQL Statements issued are only included in exceptions if `fs.s3a.select.errors.include.sql` -is explicitly set to true. This can be done in an application during development, -or in a `openFile()` option parameter. This should only be done during development, -to reduce the risk of logging security or privacy information. - - -### "mid-query" failures on large datasets - -S3 Select returns paged results; the source file is _not_ filtered in -one go in the initial request. - -This means that errors related to the content of the data (type casting, etc) -may only surface partway through the read. The errors reported in such a -case may be different than those raised on reading the first page of data, -where it will happen earlier on in the read process. - -### External Resources on for troubleshooting - -See: - -* [SELECT Command Reference](https://docs.aws.amazon.com/AmazonS3/latest/dev/s3-glacier-select-sql-reference-select.html) -* [SELECT Object Content](https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html) - -### IOException: "not a gzip file" - -This surfaces when trying to read in data from a `.gz` source file through an MR -or other analytics query, and the gzip codec has tried to parse it. - -``` -java.io.IOException: not a gzip file -at org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor.processBasicHeader(BuiltInGzipDecompressor.java:496) -at org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor.executeHeaderState(BuiltInGzipDecompressor.java:257) -at org.apache.hadoop.io.compress.zlib.BuiltInGzipDecompressor.decompress(BuiltInGzipDecompressor.java:186) -at org.apache.hadoop.io.compress.DecompressorStream.decompress(DecompressorStream.java:111) -at org.apache.hadoop.io.compress.DecompressorStream.read(DecompressorStream.java:105) -at java.io.InputStream.read(InputStream.java:101) -at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:182) -at org.apache.hadoop.util.LineReader.readCustomLine(LineReader.java:306) -at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174) -at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:158) -at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.nextKeyValue(LineRecordReader.java:198) -``` - -The underlying problem is that the gzip decompressor is automatically enabled -when the source file ends with the ".gz" extension. Because S3 Select -returns decompressed data, the codec fails. - -The workaround here is to declare that the job should add the "Passthrough Codec" -to its list of known decompressors, and that this codec should declare the -file format it supports to be ".gz". - -``` -io.compression.codecs = org.apache.hadoop.io.compress.PassthroughCodec -io.compress.passthrough.extension = .gz -``` - -### AWSBadRequestException `InvalidColumnIndex` - - -Your SQL is wrong and the element at fault is considered an unknown column -name. - -``` -org.apache.hadoop.fs.s3a.AWSBadRequestException: - Select: SELECT * FROM S3OBJECT WHERE odd = true on test/testSelectOddLines.csv: - com.amazonaws.services.s3.model.AmazonS3Exception: - The column index at line 1, column 30 is invalid. - Please check the service documentation and try again. - (Service: Amazon S3; Status Code: 400; Error Code: InvalidColumnIndex; -``` - -Here it's the first line of the query, column 30. Paste the query -into an editor and position yourself on the line and column at fault. - -```sql -SELECT * FROM S3OBJECT WHERE odd = true - ^ HERE -``` - -Another example: - -``` -org.apache.hadoop.fs.s3a.AWSBadRequestException: Select: -SELECT * FROM S3OBJECT s WHERE s._1 = "true" on test/testSelectOddLines.csv: - com.amazonaws.services.s3.model.AmazonS3Exception: - The column index at line 1, column 39 is invalid. - Please check the service documentation and try again. - (Service: Amazon S3; Status Code: 400; - Error Code: InvalidColumnIndex; -``` - -Here it is because strings must be single quoted, not double quoted. - -```sql -SELECT * FROM S3OBJECT s WHERE s._1 = "true" - ^ HERE -``` - -S3 select uses double quotes to wrap column names, interprets the string -as column "true", and fails with a non-intuitive message. - -*Tip*: look for the element at fault and treat the `InvalidColumnIndex` -message as a parse-time message, rather than the definitive root -cause of the problem. - -### AWSBadRequestException `ParseInvalidPathComponent` - -Your SQL is wrong. - -``` -org.apache.hadoop.fs.s3a.AWSBadRequestException: -Select: SELECT * FROM S3OBJECT s WHERE s.'odd' is "true" on test/testSelectOddLines.csv -: com.amazonaws.services.s3.model.AmazonS3Exception: Invalid Path component, - expecting either an IDENTIFIER or STAR, got: LITERAL,at line 1, column 34. - (Service: Amazon S3; Status Code: 400; Error Code: ParseInvalidPathComponent; - -``` - -``` -SELECT * FROM S3OBJECT s WHERE s.'odd' is "true" on test/testSelectOddLines.csv - ^ HERE -``` - -### AWSBadRequestException `ParseExpectedTypeName` +The `hadoop s3guard select` command is no longer supported. -Your SQL is still wrong. +Previously, the command would either generate an S3 select or a error (with exit code 42 being +the one for not enough arguments): ``` +hadoop s3guard select +select [OPTIONS] [-limit rows] [-header (use|none|ignore)] [-out path] [-expected rows] + [-compression (gzip|bzip2|none)] [-inputformat csv] [-outputformat csv]