From 70380c357d265c6589fa9b14a5bf7aa2f1e5155b Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 21 Jul 2023 16:36:27 +0100 Subject: [PATCH 01/20] HADOOP-18820. Cut AWS v1 support * downgrade v1 sdk to "provided" * only allowed in one package in production code * use isolated classes and reflection games to allow rest of module to work without it when instantiating credential providers. * tests updated as appropriate * remapping of standard aws v1 credential providers to v2 classnames prior to instantiation. Change-Id: I0ab248e71e696b638420c09afc2b141420aaf596 --- hadoop-tools/hadoop-aws/pom.xml | 12 +- .../fs/s3a/AWSCredentialProviderList.java | 51 +--- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 4 +- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 66 ++++- .../org/apache/hadoop/fs/s3a/Tristate.java | 45 +++- .../fs/s3a/adapter/AwsV1BindingSupport.java | 123 ++++++++++ .../V1ToV2AwsCredentialProviderAdapter.java | 64 ++++- .../V1V2AwsCredentialProviderAdapter.java | 36 --- .../hadoop/fs/s3a/adapter/package-info.java | 4 + .../s3a/auth/AwsCredentialListProvider.java | 225 +++++++++--------- .../fs/s3a/impl/InstantiationIOException.java | 120 ++++++++++ .../s3a/ITestS3AAWSCredentialsProvider.java | 4 +- .../fs/s3a/TestS3AAWSCredentialsProvider.java | 199 ++++++++++------ 13 files changed, 646 insertions(+), 307 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 0731df4daf089..cd9bd6e875e89 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -464,6 +464,16 @@ org.apache.hadoop.mapred.** + + false + Restrict AWS v1 imports to adapter code + + org.apache.hadoop.fs.s3a.adapter.V1ToV2AwsCredentialProviderAdapter + + + com.amazonaws.** + + @@ -497,7 +507,7 @@ com.amazonaws aws-java-sdk-core - compile + provided software.amazon.awssdk diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java index 5e98b99966b78..1b780c99b84c2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java @@ -21,18 +21,12 @@ import java.io.Closeable; import java.util.ArrayList; import java.util.Collection; -import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.BasicAWSCredentials; -import com.amazonaws.auth.BasicSessionCredentials; import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.fs.s3a.adapter.V1V2AwsCredentialProviderAdapter; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; @@ -48,7 +42,6 @@ import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; -import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; import software.amazon.awssdk.core.exception.SdkException; /** @@ -106,23 +99,8 @@ public AWSCredentialProviderList() { * @param providers provider list. */ public AWSCredentialProviderList( - Collection providers) { - for (AWSCredentialsProvider provider: providers) { - this.providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider)); - } - } - - /** - * Create with an initial list of providers. - * @param name name for error messages, may be "" - * @param providerArgs provider list. - */ - public AWSCredentialProviderList(final String name, - final AWSCredentialsProvider... providerArgs) { - setName(name); - for (AWSCredentialsProvider provider: providerArgs) { - this.providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider)); - } + Collection providers) { + this.providers.addAll(providers); } /** @@ -133,7 +111,7 @@ public AWSCredentialProviderList(final String name, public AWSCredentialProviderList(final String name, final AwsCredentialsProvider... providerArgs) { setName(name); - Collections.addAll(providers, providerArgs); + providers.forEach(this::add); } /** @@ -148,14 +126,6 @@ public void setName(final String name) { } } - /** - * Add a new provider. - * @param provider provider - */ - public void add(AWSCredentialsProvider provider) { - providers.add(V1V2AwsCredentialProviderAdapter.adapt(provider)); - } - /** * Add a new SDK V2 provider. * @param provider provider @@ -173,21 +143,6 @@ public void addAll(AWSCredentialProviderList other) { providers.addAll(other.providers); } - /** - * This method will get credentials using SDK V2's resolveCredentials and then convert it into - * V1 credentials. This required by delegation token binding classes. - * @return SDK V1 credentials - */ - public AWSCredentials getCredentials() { - AwsCredentials credentials = resolveCredentials(); - if (credentials instanceof AwsSessionCredentials) { - return new BasicSessionCredentials(credentials.accessKeyId(), - credentials.secretAccessKey(), - ((AwsSessionCredentials) credentials).sessionToken()); - } else { - return new BasicAWSCredentials(credentials.accessKeyId(), credentials.secretAccessKey()); - } - } /** * Iterate through the list of providers, to find one with credentials. diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 7416aa1fd113d..9fe3163dbf73a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -230,7 +230,7 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.Statistic.*; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.INITIALIZE_SPAN; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.createAWSCredentialProviderSet; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.createAWSCredentialProviderList; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.STATEMENT_ALLOW_SSE_KMS_RW; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.allowS3Operations; import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.TokenIssuingPolicy.NoTokensAvailable; @@ -952,7 +952,7 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { uaSuffix = tokens.getUserAgentField(); } else { // DT support is disabled, so create the normal credential chain - credentials = createAWSCredentialProviderSet(name, conf); + credentials = createAWSCredentialProviderList(name, conf); } LOG.debug("Using credential provider {}", credentials); Class s3ClientFactoryClass = conf.getClass( diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 8dbe3f12a6e2b..e2cad4d1a2689 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -75,6 +75,10 @@ import static org.apache.commons.lang3.StringUtils.isEmpty; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.impl.ErrorTranslation.isUnknownBucket; +import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.instantiationException; +import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.isAbstract; +import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.isNotInstanceOf; +import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.unsupportedConstructor; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.*; import static org.apache.hadoop.io.IOUtils.cleanupWithLogger; import static org.apache.hadoop.util.functional.RemoteIterators.filteringRemoteIterator; @@ -89,9 +93,6 @@ public final class S3AUtils { private static final Logger LOG = LoggerFactory.getLogger(S3AUtils.class); - static final String CONSTRUCTOR_EXCEPTION = "constructor exception"; - static final String INSTANTIATION_EXCEPTION - = "instantiation exception"; static final String ENDPOINT_KEY = "Endpoint"; @@ -562,15 +563,57 @@ public static long dateToLong(final Date date) { * @return instance of the specified class * @throws IOException on any problem */ - @SuppressWarnings("unchecked") public static InstanceT getInstanceFromReflection(Class instanceClass, - Configuration conf, @Nullable URI uri, Class interfaceImplemented, String methodName, + Configuration conf, @Nullable URI uri, Class interfaceImplemented, String methodName, String configKey) throws IOException { String className = instanceClass.getName(); + return getInstanceFromReflection(className,conf, uri ,interfaceImplemented, methodName,configKey); + } + /*** + * Creates an instance of a class using reflection. The + * class must implement one of the following means of construction, which are + * attempted in order: + * + *
    + *
  1. a public constructor accepting java.net.URI and + * org.apache.hadoop.conf.Configuration
  2. + *
  3. a public constructor accepting + * org.apache.hadoop.conf.Configuration
  4. + *
  5. a public static method named as per methodName, that accepts no + * arguments and returns an instance of + * specified type, or
  6. + *
  7. a public default constructor.
  8. + *
+ * + * @param className name of class for which instance is to be created + * @param conf configuration + * @param uri URI of the FS + * @param interfaceImplemented interface that this class implements + * @param methodName name of factory method to be invoked + * @param configKey config key under which this class is specified + * @param Instance of class + * @return instance of the specified class + * @throws IOException on any problem + */ + @SuppressWarnings("unchecked") + public static InstanceT getInstanceFromReflection(String className, + Configuration conf, + @Nullable URI uri, + Class interfaceImplemented, + String methodName, + String configKey) throws IOException { try { - Constructor cons = null; + Class instanceClass = S3AUtils.class.getClassLoader().loadClass(className); + if (Modifier.isAbstract(instanceClass.getModifiers())) { + throw isAbstract(className, configKey); + } + if (!interfaceImplemented.isAssignableFrom(instanceClass)) { + throw isNotInstanceOf(className, interfaceImplemented.getName(), configKey); + + } + Constructor cons; if (conf != null) { // new X(uri, conf) cons = getConstructor(instanceClass, URI.class, Configuration.class); @@ -598,10 +641,7 @@ public static InstanceT getInstanceFromReflection(Class instanceC } // no supported constructor or factory method found - throw new IOException(String.format("%s " + CONSTRUCTOR_EXCEPTION - + ". A class specified in %s must provide a public constructor " - + "of a supported signature, or a public factory method named " - + "create that accepts no arguments.", className, configKey)); + throw unsupportedConstructor(className, configKey); } catch (InvocationTargetException e) { Throwable targetException = e.getTargetException(); if (targetException == null) { @@ -613,13 +653,13 @@ public static InstanceT getInstanceFromReflection(Class instanceC throw translateException("Instantiate " + className, "", (SdkException) targetException); } else { // supported constructor or factory method found, but the call failed - throw new IOException(className + " " + INSTANTIATION_EXCEPTION + ": " + targetException, - targetException); + throw instantiationException(className , configKey, targetException); } } catch (ReflectiveOperationException | IllegalArgumentException e) { // supported constructor or factory method found, but the call failed - throw new IOException(className + " " + INSTANTIATION_EXCEPTION + ": " + e, e); + throw instantiationException(className , configKey, e); } + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Tristate.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Tristate.java index 0462ccfd7cbbd..44ddcda522040 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Tristate.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Tristate.java @@ -18,15 +18,58 @@ package org.apache.hadoop.fs.s3a; +import java.util.Optional; + +import static java.util.Optional.empty; +import static java.util.Optional.of; + /** * Simple enum to express {true, false, don't know}. */ public enum Tristate { + // Do not add additional values here. Logic will assume there are exactly // three possibilities. - TRUE, FALSE, UNKNOWN; + TRUE(of(Boolean.TRUE)), + FALSE(of(Boolean.FALSE)), + UNKNOWN(empty()); + + /** + * Mapping to an optional boolean. + */ + @SuppressWarnings("NonSerializableFieldInSerializableClass") + private final Optional mapping; + + Tristate(final Optional t) { + mapping = t; + } + + /** + * Get the boolean mapping, if present. + * @return the boolean value, if present. + */ + public Optional getMapping() { + return mapping; + } + + /** + * Does this value map to a boolean. + * @return true if the state is one of true or false. + */ + public boolean isBoolean() { + return mapping.isPresent(); + } public static Tristate fromBool(boolean v) { return v ? TRUE : FALSE; } + + /** + * Build a tristate from a boolean. + * @param b source optional + * @return a tristate derived from the argument. + */ + public static Tristate from(Optional b) { + return b.map(Tristate::fromBool).orElse(UNKNOWN); + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java new file mode 100644 index 0000000000000..5916825f03e52 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java @@ -0,0 +1,123 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.adapter; + +import java.io.IOException; +import java.net.URI; +import java.util.Optional; +import javax.annotation.Nullable; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.Tristate; + +/** + * Binding support; the sole way which the rest of the code should instantiate v1 SDK libraries. + * Uses this class's Classloader for its analysis/loading. + */ +@SuppressWarnings("StaticNonFinalField") +public class AwsV1BindingSupport { + + private static final Logger LOG = LoggerFactory.getLogger( + AwsV1BindingSupport.class); + + public static final String CREDENTIAL_PROVIDER_CLASSNAME = + "com.amazonaws.auth.AWSCredentialsProvider"; + + public static final String NOT_AWS_PROVIDER = + "does not implement AWSCredentialsProvider"; + + public static final String NOT_AWS_V2_PROVIDER = + "does not implement AwsCredentialsProvider"; + + public static final String ABSTRACT_PROVIDER = + "is abstract and therefore cannot be created"; + + /** + * Tack availability. + */ + private static Tristate sdkAvailability = Tristate.UNKNOWN; + + @SuppressWarnings("FieldAccessedSynchronizedAndUnsynchronized") + private static Class credentialProviderClass; + + static { + isAwsV1SdkAvailable(); + } + + /** + * Is the AWS v1 SDK available + * @param cl classloader to look in. + * @return true if it was found in the classloader + */ + public static synchronized boolean isAwsV1SdkAvailable() { + + final Optional mapping = sdkAvailability.getMapping(); + if (mapping.isPresent()) { + return mapping.get(); + } + // no binding, so calculate it once. + try { + ClassLoader cl = AwsV1BindingSupport.class.getClassLoader(); + credentialProviderClass = cl.loadClass(CREDENTIAL_PROVIDER_CLASSNAME); + LOG.debug("v1 SDK class {} found", CREDENTIAL_PROVIDER_CLASSNAME); + sdkAvailability = Tristate.TRUE; + } catch (Exception e) { + LOG.debug("v1 SDK class {} not found", CREDENTIAL_PROVIDER_CLASSNAME, e); + sdkAvailability = Tristate.FALSE; + } + // guaranteed to be non-empty + return sdkAvailability.getMapping().get(); + } + + /** + * Create an AWS credential provider from its class by using reflection. The + * class must implement one of the following means of construction, which are + * attempted in order: + * + *
    + *
  1. a public constructor accepting java.net.URI and + * org.apache.hadoop.conf.Configuration
  2. + *
  3. a public constructor accepting + * org.apache.hadoop.conf.Configuration
  4. + *
  5. a public static method named getInstance that accepts no + * arguments and returns an instance of + * com.amazonaws.auth.AWSCredentialsProvider, or
  6. + *
  7. a public default constructor.
  8. + *
+ * @param conf configuration + * @param className credential classname + * @param uri URI of the FS + * @return the instantiated class + * @throws IOException on any instantiation failure, including v1 SDK not found. + */ + public static AwsCredentialsProvider createAWSV1CredentialProvider( + Configuration conf, + String className, + @Nullable URI uri) throws IOException { + if (!isAwsV1SdkAvailable()) { + throw new IOException("No AWS v1 SDK available; unable to load " + className); + } + return V1ToV2AwsCredentialProviderAdapter.create(conf, className, uri); + + } +} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java index 242a29fe21396..a57f73b265555 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java @@ -18,6 +18,10 @@ package org.apache.hadoop.fs.s3a.adapter; +import java.io.IOException; +import java.net.URI; +import javax.annotation.Nullable; + import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.AWSSessionCredentials; @@ -28,16 +32,22 @@ import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.S3AUtils; + +import static java.util.Objects.requireNonNull; +import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; + /** * Adapts a V1 {@link AWSCredentialsProvider} to the V2 {@link AwsCredentialsProvider} interface. - * Implements both interfaces so can be used with either the V1 or V2 AWS SDK. */ -final class V1ToV2AwsCredentialProviderAdapter implements V1V2AwsCredentialProviderAdapter { +public final class V1ToV2AwsCredentialProviderAdapter implements AwsCredentialsProvider { private final AWSCredentialsProvider v1CredentialsProvider; + private V1ToV2AwsCredentialProviderAdapter(AWSCredentialsProvider v1CredentialsProvider) { - this.v1CredentialsProvider = v1CredentialsProvider; + this.v1CredentialsProvider = requireNonNull(v1CredentialsProvider); } @Override @@ -55,20 +65,52 @@ public AwsCredentials resolveCredentials() { } @Override - public AWSCredentials getCredentials() { - return v1CredentialsProvider.getCredentials(); - } - - @Override - public void refresh() { - v1CredentialsProvider.refresh(); + public String toString() { + return "V1ToV2AwsCredentialProviderAdapter{" + + "v1CredentialsProvider=" + v1CredentialsProvider + + '}'; } /** * @param v1CredentialsProvider V1 credential provider to adapt. * @return A new instance of the credentials provider adapter. */ - static V1ToV2AwsCredentialProviderAdapter create(AWSCredentialsProvider v1CredentialsProvider) { + static AwsCredentialsProvider create(AWSCredentialsProvider v1CredentialsProvider) { return new V1ToV2AwsCredentialProviderAdapter(v1CredentialsProvider); } + + /** + * Create an AWS credential provider from its class by using reflection. The + * class must implement one of the following means of construction, which are + * attempted in order: + * + *
    + *
  1. a public constructor accepting java.net.URI and + * org.apache.hadoop.conf.Configuration
  2. + *
  3. a public constructor accepting + * org.apache.hadoop.conf.Configuration
  4. + *
  5. a public static method named getInstance that accepts no + * arguments and returns an instance of + * com.amazonaws.auth.AWSCredentialsProvider, or
  6. + *
  7. a public default constructor.
  8. + *
+ * @param conf configuration + * @param className classname + * @param uri URI of the FS + * @return the instantiated class + * @throws IOException on any instantiation failure. + */ + static AwsCredentialsProvider create( + Configuration conf, + String className, + @Nullable URI uri) throws IOException { + + + final AWSCredentialsProvider instance = + S3AUtils.getInstanceFromReflection(className, conf, uri, AWSCredentialsProvider.class, + "getInstance", AWS_CREDENTIALS_PROVIDER); + return create(instance); + + } + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java deleted file mode 100644 index f27166a9ef91d..0000000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1V2AwsCredentialProviderAdapter.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a.adapter; - -import com.amazonaws.auth.AWSCredentialsProvider; -import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; - -public interface V1V2AwsCredentialProviderAdapter extends AWSCredentialsProvider, - AwsCredentialsProvider { - - /** - * Creates a two-way adapter from a V1 {@link AWSCredentialsProvider} interface. - * - * @param v1CredentialsProvider V1 credentials provider. - * @return Two-way credential provider adapter. - */ - static V1V2AwsCredentialProviderAdapter adapt(AWSCredentialsProvider v1CredentialsProvider) { - return V1ToV2AwsCredentialProviderAdapter.create(v1CredentialsProvider); - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java index 8d03c915e171a..124534188bccc 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/package-info.java @@ -18,6 +18,10 @@ /** * Adapter classes for allowing V1 credential providers to be used with SDKV2. + * This is the only package where use of aws v1 classes are permitted; + * all instantiations of objects here must use reflection to probe for + * availability or be prepared to catch exceptions which may be raised + * if the v1 SDK isn't found on the classpath */ @InterfaceAudience.Private @InterfaceStability.Unstable diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java index d94f8c25bad6b..420d13a9e5911 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java @@ -19,18 +19,18 @@ package org.apache.hadoop.fs.s3a.auth; import java.io.IOException; -import java.lang.reflect.Modifier; import java.net.URI; import java.util.Arrays; +import java.util.Collection; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.stream.Collectors; import javax.annotation.Nullable; -import com.amazonaws.auth.AWSCredentialsProvider; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; @@ -39,31 +39,30 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; +import org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider; import org.apache.hadoop.fs.s3a.Constants; import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider; import org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider; +import org.apache.hadoop.fs.s3a.adapter.AwsV1BindingSupport; +import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; import org.apache.hadoop.fs.s3native.S3xLoginHelper; +import org.apache.hadoop.fs.store.LogExactlyOnce; -import static org.apache.hadoop.fs.s3a.Constants.AWS_AUTH_CLASS_PREFIX; import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; +import static org.apache.hadoop.fs.s3a.adapter.AwsV1BindingSupport.isAwsV1SdkAvailable; /** * This class provides methods to create the list of AWS credential providers. */ public final class AwsCredentialListProvider { - private AwsCredentialListProvider() { - } - private static final Logger LOG = LoggerFactory.getLogger(AwsCredentialListProvider.class); - public static final String NOT_AWS_PROVIDER = - "does not implement AWSCredentialsProvider"; - public static final String NOT_AWS_V2_PROVIDER = - "does not implement AwsCredentialsProvider"; - public static final String ABSTRACT_PROVIDER = - "is abstract and therefore cannot be created"; + /** + * A v1 entry has been remapped. warn once about this and then shut up. + */ + private static final LogExactlyOnce LOG_REMAPPED_ENTRY = new LogExactlyOnce(LOG); /** * Error message when the AWS provider list built up contains a forbidden @@ -84,6 +83,24 @@ private AwsCredentialListProvider() { EnvironmentVariableCredentialsProvider.class, IAMInstanceCredentialsProvider.class)); + public static final String V1_ENVIRONMENT_CREDENTIALS = + "com.amazonaws.auth.EnvironmentVariableCredentialsProvider"; + + public static final String V1_EC2_CONTAINER_CREDENTIALS = + "com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper"; + + public static final String V1_EC2_IAM_CREDENTIALS = + "com.amazonaws.auth.InstanceProfileCredentialsProvider"; + + public static final String V1_ANONYMOUS_CREDENTIALS = + "com.amazonaws.auth.AnonymousAWSCredentials"; + + public static final String V2_EC2_IAM_CREDENTIALS = + IAMInstanceCredentialsProvider.class.getName(); + + public static final String V2_ENVIRONMENT_CREDENTIALS = + EnvironmentVariableCredentialsProvider.class.getName(); + /** * Create the AWS credentials from the providers, the URI and * the key {@link Constants#AWS_CREDENTIALS_PROVIDER} in the configuration. @@ -93,7 +110,7 @@ private AwsCredentialListProvider() { * @throws IOException Problems loading the providers (including reading * secrets from credential files). */ - public static AWSCredentialProviderList createAWSCredentialProviderSet( + public static AWSCredentialProviderList createAWSCredentialProviderList( @Nullable URI binding, Configuration conf) throws IOException { // this will reject any user:secret entries in the URI @@ -115,32 +132,36 @@ public static AWSCredentialProviderList createAWSCredentialProviderSet( * @param conf configuration * @param key key * @param defaultValue list of default values - * @return the list of classes, possibly empty + * @return the list of classes, empty if the default list is empty and + * there was no match for the key in the configuration. * @throws IOException on a failure to load the list. */ - private static List> loadAWSProviderClasses(Configuration conf, + private static Collection loadAWSProviderClasses(Configuration conf, String key, Class... defaultValue) throws IOException { - try { - return Arrays.asList(conf.getClasses(key, defaultValue)); - } catch (RuntimeException e) { - Throwable c = e.getCause() != null ? e.getCause() : e; - throw new IOException("From option " + key + ' ' + c, c); + final Collection classnames = conf.getTrimmedStringCollection(key); + if (classnames.isEmpty()) { + // empty list; return the defaults + return Arrays.stream(defaultValue).map(c -> c.getName()).collect(Collectors.toList()); + } else { + return classnames; } } /** * Maps V1 credential providers to either their equivalent SDK V2 class or hadoop provider. */ - private static Map initCredentialProvidersMap() { - Map v1v2CredentialProviderMap = new HashMap<>(); + private static Map initCredentialProvidersMap() { + Map v1v2CredentialProviderMap = new HashMap<>(); - v1v2CredentialProviderMap.put("EnvironmentVariableCredentialsProvider", - EnvironmentVariableCredentialsProvider.class); - v1v2CredentialProviderMap.put("EC2ContainerCredentialsProviderWrapper", - IAMInstanceCredentialsProvider.class); - v1v2CredentialProviderMap.put("InstanceProfileCredentialsProvider", - IAMInstanceCredentialsProvider.class); + v1v2CredentialProviderMap.put(V1_ENVIRONMENT_CREDENTIALS, + V2_ENVIRONMENT_CREDENTIALS); + v1v2CredentialProviderMap.put(V1_EC2_CONTAINER_CREDENTIALS, + V2_EC2_IAM_CREDENTIALS); + v1v2CredentialProviderMap.put(V1_EC2_IAM_CREDENTIALS, + V2_EC2_IAM_CREDENTIALS); + v1v2CredentialProviderMap.put(V1_ANONYMOUS_CREDENTIALS, + AnonymousAWSCredentialsProvider.NAME); return v1v2CredentialProviderMap; } @@ -164,120 +185,88 @@ public static AWSCredentialProviderList buildAWSProviderList( final Set> forbidden) throws IOException { // build up the base provider - List> awsClasses = loadAWSProviderClasses(conf, + Collection awsClasses = loadAWSProviderClasses(conf, key, defaultValues.toArray(new Class[defaultValues.size()])); - Map v1v2CredentialProviderMap = initCredentialProvidersMap(); - // and if the list is empty, switch back to the defaults. - // this is to address the issue that configuration.getClasses() - // doesn't return the default if the config value is just whitespace. - if (awsClasses.isEmpty()) { - awsClasses = defaultValues; - } - // iterate through, checking for blacklists and then instantiating + Map v1v2CredentialProviderMap = initCredentialProvidersMap(); + + // iterate through, checking for forbidden values and then instantiating // each provider AWSCredentialProviderList providers = new AWSCredentialProviderList(); - for (Class aClass : awsClasses) { + for (String className : awsClasses) { + if (v1v2CredentialProviderMap.containsKey(className)) { + // mapping - if (forbidden.contains(aClass)) { + final String mapped = v1v2CredentialProviderMap.get(className); + LOG_REMAPPED_ENTRY.warn("Credentials option {} contains AWS v1 SDK entry {}; mapping to {}", + key, className, mapped); + className = mapped; + } + // now scan the forbidden list. doing this after any mappings ensures the v1 names + // are also blocked + if (forbidden.contains(className)) { throw new IOException(E_FORBIDDEN_AWS_PROVIDER - + " in option " + key + ": " + aClass); + + " in option " + key + ": " + className); } - if (v1v2CredentialProviderMap.containsKey(aClass.getSimpleName()) && - aClass.getName().contains(AWS_AUTH_CLASS_PREFIX)){ - providers.add(createAWSV2CredentialProvider(conf, - v1v2CredentialProviderMap.get(aClass.getSimpleName()), binding)); - } else if (AWSCredentialsProvider.class.isAssignableFrom(aClass)) { - providers.add(createAWSV1CredentialProvider(conf, - aClass, binding)); - } else { - providers.add(createAWSV2CredentialProvider(conf, aClass, binding)); + try { + providers.add(createAWSV2CredentialProvider(conf, className, binding)); + } catch (InstantiationIOException e) { + // failed to create a v2; try to see if it is a v1 + if (e.getKind() == InstantiationIOException.Kind.IsNotImplementation) { + if (isAwsV1SdkAvailable()) { + // try to create v1 + LOG.debug("Failed to create {} as v2 credentials, trying to instantiate as v1", + className); + try { + final AwsCredentialsProvider provider = + AwsV1BindingSupport.createAWSV1CredentialProvider(conf, className, binding); + LOG_REMAPPED_ENTRY.warn("Credentials option {} contains AWS v1 SDK entry {}", + key, className); + providers.add(provider); + } catch (InstantiationIOException ex) { + // if it is something other than non-implementation, throw. + // that way, non-impl messages are about v2 not v1 in the error + if (ex.getKind() != InstantiationIOException.Kind.IsNotImplementation) { + throw ex; + } else { + throw e; + } + } + } else { + LOG.warn("Failed to instantiate {} as AWS v2 SDK credential provider;" + + " AWS V1 SDK is not on the classpth so unable to attempt to" + + " instantiate as a v1 provider", className, e); + throw e; + } + } else { + // any other problem + throw e; + + } } } return providers; } - /** - * Create an AWS credential provider from its class by using reflection. The - * class must implement one of the following means of construction, which are - * attempted in order: - * - *
    - *
  1. a public constructor accepting java.net.URI and - * org.apache.hadoop.conf.Configuration
  2. - *
  3. a public constructor accepting - * org.apache.hadoop.conf.Configuration
  4. - *
  5. a public static method named getInstance that accepts no - * arguments and returns an instance of - * com.amazonaws.auth.AWSCredentialsProvider, or
  6. - *
  7. a public default constructor.
  8. - *
- * - * @param conf configuration - * @param credClass credential class - * @param uri URI of the FS - * @return the instantiated class - * @throws IOException on any instantiation failure. - */ - private static AWSCredentialsProvider createAWSV1CredentialProvider(Configuration conf, - Class credClass, @Nullable URI uri) throws IOException { - AWSCredentialsProvider credentials = null; - String className = credClass.getName(); - if (!AWSCredentialsProvider.class.isAssignableFrom(credClass)) { - throw new IOException("Class " + credClass + " " + NOT_AWS_PROVIDER); - } - if (Modifier.isAbstract(credClass.getModifiers())) { - throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); - } - LOG.debug("Credential provider class is {}", className); - - credentials = - S3AUtils.getInstanceFromReflection(credClass, conf, uri, AWSCredentialsProvider.class, - "getInstance", AWS_CREDENTIALS_PROVIDER); - return credentials; - - } /** - * Create an AWS credential provider from its class by using reflection. The - * class must implement one of the following means of construction, which are - * attempted in order: - * - *
    - *
  1. a public constructor accepting java.net.URI and - * org.apache.hadoop.conf.Configuration
  2. - *
  3. a public constructor accepting - * org.apache.hadoop.conf.Configuration
  4. - *
  5. a public static method named getInstance that accepts no - * arguments and returns an instance of - * software.amazon.awssdk.auth.credentials.AwsCredentialsProvider, or
  6. - *
  7. a public default constructor.
  8. - *
- * + * Create an AWS v2 credential provider from its class by using reflection. * @param conf configuration - * @param credClass credential class + * @param className credential class name * @param uri URI of the FS * @return the instantiated class * @throws IOException on any instantiation failure. + * @see S3AUtils#getInstanceFromReflection */ private static AwsCredentialsProvider createAWSV2CredentialProvider(Configuration conf, - Class credClass, @Nullable URI uri) throws IOException { - AwsCredentialsProvider credentials = null; - String className = credClass.getName(); - if (!AwsCredentialsProvider.class.isAssignableFrom(credClass)) { - throw new IOException("Class " + credClass + " " + NOT_AWS_V2_PROVIDER); - } - if (Modifier.isAbstract(credClass.getModifiers())) { - throw new IOException("Class " + credClass + " " + ABSTRACT_PROVIDER); - } + String className, + @Nullable URI uri) throws IOException { LOG.debug("Credential provider class is {}", className); - credentials = - S3AUtils.getInstanceFromReflection(credClass, conf, uri, AwsCredentialsProvider.class, - "create", AWS_CREDENTIALS_PROVIDER); - return credentials; + return S3AUtils.getInstanceFromReflection(className, conf, uri, AwsCredentialsProvider.class, + "create", AWS_CREDENTIALS_PROVIDER); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java new file mode 100644 index 0000000000000..94e8be7100f9d --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.impl; + +import java.io.IOException; + +/** + * An instantiation exception raised during reflection-based creation + * of classes. + * Uses an enum of kind so tests/code can examine it, without + * creating a full hierarchy of exception classes. + */ +public class InstantiationIOException extends IOException { + + public static final String ABSTRACT_PROVIDER = + "is abstract and therefore cannot be created"; + + public static final String CONSTRUCTOR_EXCEPTION = "constructor exception"; + + public static final String INSTANTIATION_EXCEPTION + = "instantiation exception"; + + public static final String DOES_NOT_IMPLEMENT + = "does not implement"; + + /** + * Exception kind. + */ + private final Kind kind; + + /** + * Class being instantiated. + */ + private final String classname; + + /** + * key used. + */ + private final String key; + + + public InstantiationIOException( + final Kind kind, + final String classname, + final String key, + final String message, + final Throwable cause) { + super("Class " + classname + " " + message + + (key != null ? (" (configuration key " + key + ")") : ""), + cause); + this.kind = kind; + this.classname = classname; + this.key = key; + } + + public String getClassname() { + return classname; + } + + public Kind getKind() { + return kind; + } + + public String getKey() { + return key; + } + + public static InstantiationIOException isAbstract(String classname, String key) { + return new InstantiationIOException(Kind.IsAbstract, classname, key, ABSTRACT_PROVIDER, null); + } + + public static InstantiationIOException isNotInstanceOf(String classname, + String interfaceName, + String key) { + return new InstantiationIOException(Kind.IsNotImplementation, classname, + key, DOES_NOT_IMPLEMENT + " " + interfaceName, null); + } + + + public static InstantiationIOException unsupportedConstructor(String classname, String key) { + return new InstantiationIOException(Kind.UnsupportedConstructor, + classname, key, CONSTRUCTOR_EXCEPTION, null); + } + + + public static InstantiationIOException instantiationException(String classname, + String key, + Throwable t) { + return new InstantiationIOException(Kind.InstantiationFailure, + classname, key, INSTANTIATION_EXCEPTION + " " + t, t); + } + + /** + * An (extensible) enum of kinds. + */ + public enum Kind { + IsAbstract, + UnsupportedConstructor, + IsNotImplementation, + InstantiationFailure, + Other, + } + +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java index 6d1b10954e7c5..690607a530f7a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java @@ -26,6 +26,7 @@ import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; import org.apache.hadoop.test.GenericTestUtils; import org.junit.Rule; @@ -41,7 +42,6 @@ import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.getCSVTestPath; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; -import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.DELEGATION_TOKEN_BINDING; import static org.junit.Assert.*; @@ -98,7 +98,7 @@ public void testBadCredentialsConstructor() throws Exception { try { createFailingFS(conf); } catch (IOException e) { - GenericTestUtils.assertExceptionContains(CONSTRUCTOR_EXCEPTION, e); + GenericTestUtils.assertExceptionContains(InstantiationIOException.CONSTRUCTOR_EXCEPTION, e); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java index 1eee096180b16..c5281dbf6968e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java @@ -30,39 +30,43 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; import javax.annotation.Nullable; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; -import com.amazonaws.auth.ContainerCredentialsProvider; +import org.assertj.core.api.Assertions; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider; - -import org.apache.hadoop.fs.s3a.adapter.V1V2AwsCredentialProviderAdapter; -import org.apache.hadoop.util.Sets; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.adapter.V1ToV2AwsCredentialProviderAdapter; import org.apache.hadoop.fs.s3a.auth.AbstractSessionCredentialsProvider; import org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider; +import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider; import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException; +import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; import org.apache.hadoop.io.retry.RetryPolicy; +import org.apache.hadoop.util.Sets; + import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestConstants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; -import static org.apache.hadoop.fs.s3a.S3AUtils.*; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.ABSTRACT_PROVIDER; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.NOT_AWS_V2_PROVIDER; import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.STANDARD_AWS_PROVIDERS; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.V1_ANONYMOUS_CREDENTIALS; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.V1_EC2_CONTAINER_CREDENTIALS; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.V1_ENVIRONMENT_CREDENTIALS; import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.createAWSCredentialProviderSet; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.createAWSCredentialProviderList; +import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.DOES_NOT_IMPLEMENT; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; import static org.junit.Assert.*; @@ -84,13 +88,12 @@ public class TestS3AAWSCredentialsProvider { @Test public void testProviderWrongClass() throws Exception { expectProviderInstantiationFailure(this.getClass(), - NOT_AWS_V2_PROVIDER); - } + DOES_NOT_IMPLEMENT + " software.amazon.awssdk.auth.credentials.AwsCredentialsProvider");} @Test public void testProviderAbstractClass() throws Exception { expectProviderInstantiationFailure(AbstractProvider.class, - ABSTRACT_PROVIDER); + InstantiationIOException.ABSTRACT_PROVIDER); } @Test @@ -103,14 +106,14 @@ public void testProviderNotAClass() throws Exception { public void testProviderConstructorError() throws Exception { expectProviderInstantiationFailure( ConstructorSignatureErrorProvider.class, - CONSTRUCTOR_EXCEPTION); + InstantiationIOException.CONSTRUCTOR_EXCEPTION); } @Test public void testProviderFailureError() throws Exception { expectProviderInstantiationFailure( ConstructorFailureProvider.class, - INSTANTIATION_EXCEPTION); + InstantiationIOException.INSTANTIATION_EXCEPTION); } @Test @@ -122,7 +125,7 @@ public void testInstantiationChain() throws Throwable { + " ,\n " + AnonymousAWSCredentialsProvider.NAME); Path testFile = getCSVTestPath(conf); - AWSCredentialProviderList list = createAWSCredentialProviderSet( + AWSCredentialProviderList list = createAWSCredentialProviderList( testFile.toUri(), conf); List> expectedClasses = Arrays.asList( @@ -138,9 +141,9 @@ public void testDefaultChain() throws Exception { Configuration conf = new Configuration(false); // use the default credential provider chain conf.unset(AWS_CREDENTIALS_PROVIDER); - AWSCredentialProviderList list1 = createAWSCredentialProviderSet( + AWSCredentialProviderList list1 = createAWSCredentialProviderList( uri1, conf); - AWSCredentialProviderList list2 = createAWSCredentialProviderSet( + AWSCredentialProviderList list2 = createAWSCredentialProviderList( uri2, conf); List> expectedClasses = STANDARD_AWS_PROVIDERS; assertCredentialProviders(expectedClasses, list1); @@ -153,28 +156,43 @@ public void testDefaultChainNoURI() throws Exception { // use the default credential provider chain conf.unset(AWS_CREDENTIALS_PROVIDER); assertCredentialProviders(STANDARD_AWS_PROVIDERS, - createAWSCredentialProviderSet(null, conf)); + createAWSCredentialProviderList(null, conf)); } @Test - public void testConfiguredChainV1V2() throws Exception { - URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); - List> credentialProviders = + public void testV1V2Mapping() throws Exception { + URI uri1 = new URI("s3a://bucket1"); + + List> expectedClasses = Arrays.asList( - ContainerCredentialsProvider.class, - AnonymousAWSCredentialsProvider.class); + IAMInstanceCredentialsProvider.class, + AnonymousAWSCredentialsProvider.class, + EnvironmentVariableCredentialsProvider.class); + Configuration conf = + createProviderConfiguration(buildClassList( + V1_EC2_CONTAINER_CREDENTIALS, + V1_ANONYMOUS_CREDENTIALS, + V1_ENVIRONMENT_CREDENTIALS)); + AWSCredentialProviderList list1 = createAWSCredentialProviderList( + uri1, conf); + assertCredentialProviders(expectedClasses, list1); + } + + @Test + public void testV1Wrapping() throws Exception { + URI uri1 = new URI("s3a://bucket1"); + List> expectedClasses = Arrays.asList( - V1V2AwsCredentialProviderAdapter.class, - AnonymousAWSCredentialsProvider.class); + V1ToV2AwsCredentialProviderAdapter.class, + V1ToV2AwsCredentialProviderAdapter.class); Configuration conf = - createProviderConfiguration(buildClassListString(credentialProviders)); - AWSCredentialProviderList list1 = createAWSCredentialProviderSet( + createProviderConfiguration(buildClassList( + LegacyV1CredentialProvider.class.getName(), + LegacyV1CredentialProviderWithConf.class.getName())); + AWSCredentialProviderList list1 = createAWSCredentialProviderList( uri1, conf); - AWSCredentialProviderList list2 = createAWSCredentialProviderSet( - uri2, conf); assertCredentialProviders(expectedClasses, list1); - assertCredentialProviders(expectedClasses, list2); } @Test @@ -182,14 +200,15 @@ public void testConfiguredChain() throws Exception { URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); List> expectedClasses = Arrays.asList( - EnvironmentVariableCredentialsProvider.class, - InstanceProfileCredentialsProvider.class, - AnonymousAWSCredentialsProvider.class); + IAMInstanceCredentialsProvider.class, + AnonymousAWSCredentialsProvider.class, + EnvironmentVariableCredentialsProvider.class + ); Configuration conf = createProviderConfiguration(buildClassListString(expectedClasses)); - AWSCredentialProviderList list1 = createAWSCredentialProviderSet( + AWSCredentialProviderList list1 = createAWSCredentialProviderList( uri1, conf); - AWSCredentialProviderList list2 = createAWSCredentialProviderSet( + AWSCredentialProviderList list2 = createAWSCredentialProviderList( uri2, conf); assertCredentialProviders(expectedClasses, list1); assertCredentialProviders(expectedClasses, list2); @@ -203,9 +222,9 @@ public void testConfiguredChainUsesSharedInstanceProfile() throws Exception { Arrays.asList( InstanceProfileCredentialsProvider.class); conf.set(AWS_CREDENTIALS_PROVIDER, buildClassListString(expectedClasses)); - AWSCredentialProviderList list1 = createAWSCredentialProviderSet( + AWSCredentialProviderList list1 = createAWSCredentialProviderList( uri1, conf); - AWSCredentialProviderList list2 = createAWSCredentialProviderSet( + AWSCredentialProviderList list2 = createAWSCredentialProviderList( uri2, conf); assertCredentialProviders(expectedClasses, list1); assertCredentialProviders(expectedClasses, list2); @@ -225,30 +244,37 @@ public void testFallbackToDefaults() throws Throwable { } + private String buildClassList(Class... classes) { + return Arrays.stream(classes) + .map(Class::getCanonicalName) + .collect(Collectors.joining(",")); + } + private String buildClassList(String... classes) { + return Arrays.stream(classes) + .collect(Collectors.joining(",")); + } + /** * A credential provider declared as abstract, so it cannot be instantiated. */ - static abstract class AbstractProvider implements AWSCredentialsProvider { + static abstract class AbstractProvider implements AwsCredentialsProvider { } /** * A credential provider whose constructor signature doesn't match. */ protected static class ConstructorSignatureErrorProvider - implements AWSCredentialsProvider { + implements AwsCredentialsProvider { @SuppressWarnings("unused") public ConstructorSignatureErrorProvider(String str) { } @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { return null; } - @Override - public void refresh() { - } } /** @@ -279,33 +305,29 @@ public void testAWSExceptionTranslation() throws Throwable { } } - protected static class AWSExceptionRaisingFactory implements AWSCredentialsProvider { + protected static class AWSExceptionRaisingFactory implements AwsCredentialsProvider { public static final String NO_AUTH = "No auth"; - public static AWSCredentialsProvider getInstance() { + public static AwsCredentialsProvider create() { throw new NoAuthWithAWSException(NO_AUTH); } @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { return null; } - @Override - public void refresh() { - - } } @Test public void testFactoryWrongType() throws Throwable { expectProviderInstantiationFailure( FactoryOfWrongType.class, - CONSTRUCTOR_EXCEPTION); + InstantiationIOException.CONSTRUCTOR_EXCEPTION); } - static class FactoryOfWrongType implements AWSCredentialsProvider { + static class FactoryOfWrongType implements AwsCredentialsProvider { public static final String NO_AUTH = "No auth"; @@ -314,14 +336,10 @@ public static String getInstance() { } @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { return null; } - @Override - public void refresh() { - - } } /** @@ -334,7 +352,7 @@ public void refresh() { private IOException expectProviderInstantiationFailure(String option, String expectedErrorText) throws Exception { return intercept(IOException.class, expectedErrorText, - () -> createAWSCredentialProviderSet( + () -> createAWSCredentialProviderList( TESTFILE_URI, createProviderConfiguration(option))); } @@ -385,7 +403,9 @@ private static void assertCredentialProviders( AWSCredentialProviderList list) { assertNotNull(list); List providers = list.getProviders(); - assertEquals(expectedClasses.size(), providers.size()); + Assertions.assertThat(providers) + .describedAs("providers") + .hasSize(expectedClasses.size()); for (int i = 0; i < expectedClasses.size(); ++i) { Class expectedClass = expectedClasses.get(i); @@ -493,7 +513,7 @@ public void testIOEInConstructorPropagation() throws Throwable { /** * Credential provider which raises an IOE when constructed. */ - protected static class IOERaisingProvider implements AWSCredentialsProvider { + protected static class IOERaisingProvider implements AwsCredentialsProvider { public IOERaisingProvider(URI uri, Configuration conf) throws IOException { @@ -501,14 +521,10 @@ public IOERaisingProvider(URI uri, Configuration conf) } @Override - public AWSCredentials getCredentials() { + public AwsCredentials resolveCredentials() { return null; } - @Override - public void refresh() { - - } } private static final AwsCredentials EXPECTED_CREDENTIALS = @@ -538,13 +554,13 @@ public void testConcurrentAuthentication() throws Throwable { Configuration conf = createProviderConfiguration(SlowProvider.class.getName()); Path testFile = getCSVTestPath(conf); - AWSCredentialProviderList list = createAWSCredentialProviderSet(testFile.toUri(), conf); + AWSCredentialProviderList list = createAWSCredentialProviderList(testFile.toUri(), conf); SlowProvider provider = (SlowProvider) list.getProviders().get(0); ExecutorService pool = Executors.newFixedThreadPool(CONCURRENT_THREADS); - List> results = new ArrayList<>(); + List> results = new ArrayList<>(); try { assertFalse( @@ -560,15 +576,15 @@ public void testConcurrentAuthentication() throws Throwable { } for (int i = 0; i < CONCURRENT_THREADS; i++) { - results.add(pool.submit(() -> list.getCredentials())); + results.add(pool.submit(() -> list.resolveCredentials())); } - for (Future result : results) { - AWSCredentials credentials = result.get(); + for (Future result : results) { + AwsCredentials credentials = result.get(); assertEquals("Access key from credential provider", - "expectedAccessKey", credentials.getAWSAccessKeyId()); + "expectedAccessKey", credentials.accessKeyId()); assertEquals("Secret key from credential provider", - "expectedSecret", credentials.getAWSSecretKey()); + "expectedSecret", credentials.secretAccessKey()); } } finally { pool.awaitTermination(10, TimeUnit.SECONDS); @@ -608,12 +624,12 @@ public void testConcurrentAuthenticationError() throws Throwable { Configuration conf = createProviderConfiguration(ErrorProvider.class.getName()); Path testFile = getCSVTestPath(conf); - AWSCredentialProviderList list = createAWSCredentialProviderSet(testFile.toUri(), conf); + AWSCredentialProviderList list = createAWSCredentialProviderList(testFile.toUri(), conf); ErrorProvider provider = (ErrorProvider) list.getProviders().get(0); ExecutorService pool = Executors.newFixedThreadPool(CONCURRENT_THREADS); - List> results = new ArrayList<>(); + List> results = new ArrayList<>(); try { assertFalse("Provider not initialized. isInitialized should be false", @@ -627,10 +643,10 @@ public void testConcurrentAuthenticationError() throws Throwable { } for (int i = 0; i < CONCURRENT_THREADS; i++) { - results.add(pool.submit(() -> list.getCredentials())); + results.add(pool.submit(() -> list.resolveCredentials())); } - for (Future result : results) { + for (Future result : results) { interceptFuture(CredentialInitializationException.class, "expected error", result @@ -651,4 +667,37 @@ public void testConcurrentAuthenticationError() throws Throwable { "Provider initialization failed. getInitializationException should contain the error", provider.getInitializationException().getMessage().contains("expected error")); } + + public static final class LegacyV1CredentialProvider implements AWSCredentialsProvider { + + public LegacyV1CredentialProvider() { + } + + @Override + public AWSCredentials getCredentials() { + return null; + } + + @Override + public void refresh() { + + } + } + + public static final class LegacyV1CredentialProviderWithConf implements AWSCredentialsProvider { + + public LegacyV1CredentialProviderWithConf(Configuration conf) { + } + + @Override + public AWSCredentials getCredentials() { + return null; + } + + @Override + public void refresh() { + + } + } + } From 8357400d824851ff4d402a7409b6d13a93885359 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 24 Jul 2023 19:27:17 +0100 Subject: [PATCH 02/20] HADOOP-18820. Cut AWS v1 support * lots of improvements in the binding * autocloseable/closeable passdown as far as wrapped v1 * more diagnostics on provider classes, including useful toString() values * more tests * all token tests happy Change-Id: I4e6151ba6c35e280203dc6a705b6ed1fba7de551 --- .../fs/s3a/AWSCredentialProviderList.java | 24 ++- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 2 +- .../fs/s3a/SimpleAWSCredentialsProvider.java | 5 +- .../V1ToV2AwsCredentialProviderAdapter.java | 69 ++++++-- .../auth/AssumedRoleCredentialProvider.java | 24 ++- .../s3a/auth/AwsCredentialListProvider.java | 64 ++++--- .../auth/IAMInstanceCredentialsProvider.java | 7 + .../auth/delegation/SessionTokenBinding.java | 26 ++- .../tools/hadoop-aws/aws_sdk_upgrade.md | 164 ++++++++++++++++-- .../fs/s3a/TestS3AAWSCredentialsProvider.java | 154 +++++++++++----- .../hadoop/fs/s3a/auth/ITestAssumeRole.java | 3 +- .../hadoop/fs/s3a/auth/RoleTestUtils.java | 3 +- .../delegation/CountInvocationsProvider.java | 27 ++- .../ITestSessionDelegationTokens.java | 6 +- 14 files changed, 451 insertions(+), 127 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java index 1b780c99b84c2..26700e428fd7f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java @@ -21,23 +21,23 @@ import java.io.Closeable; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.stream.Collectors; -import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.util.Preconditions; - import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException; import org.apache.hadoop.fs.s3a.auth.NoAwsCredentialsException; import org.apache.hadoop.io.IOUtils; +import org.apache.hadoop.util.Preconditions; import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; import software.amazon.awssdk.auth.credentials.AwsCredentials; @@ -111,7 +111,7 @@ public AWSCredentialProviderList( public AWSCredentialProviderList(final String name, final AwsCredentialsProvider... providerArgs) { setName(name); - providers.forEach(this::add); + Collections.addAll(providers, providerArgs); } /** @@ -134,7 +134,6 @@ public void add(AwsCredentialsProvider provider) { providers.add(provider); } - /** * Add all providers from another list to this one. * @param other the other list. @@ -143,6 +142,13 @@ public void addAll(AWSCredentialProviderList other) { providers.addAll(other.providers); } + /** + * Was an implementation of the v1 refresh; now just + * a no-op. + */ + @Deprecated + public void refresh() { + } /** * Iterate through the list of providers, to find one with credentials. @@ -245,9 +251,11 @@ public String listProviderNames() { */ @Override public String toString() { - return "AWSCredentialProviderList[" + - name + - "refcount= " + refCount.get() + ": [" + + return "AWSCredentialProviderList" + + " name=" + name + + "; refcount= " + refCount.get() + + "; size="+ providers.size() + + ": [" + StringUtils.join(providers, ", ") + ']' + (lastProvider != null ? (" last provider: " + lastProvider) : ""); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index e2cad4d1a2689..015f3fb5cf5e0 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -571,7 +571,7 @@ public static InstanceT getInstanceFromReflection(Class instanceC return getInstanceFromReflection(className,conf, uri ,interfaceImplemented, methodName,configKey); } - /*** + /** * Creates an instance of a class using reflection. The * class must implement one of the following means of construction, which are * attempted in order: diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java index 13008e8d73e41..186698b5f1848 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java @@ -88,7 +88,10 @@ public AwsCredentials resolveCredentials() { @Override public String toString() { - return getClass().getSimpleName(); + return "SimpleAWSCredentialsProvider{" + + "accessKey.empty=" + accessKey.isEmpty() + + ", secretKey.empty'" + secretKey.isEmpty() + + '}'; } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java index a57f73b265555..bcf0f80d8ae79 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java @@ -18,14 +18,18 @@ package org.apache.hadoop.fs.s3a.adapter; +import java.io.Closeable; import java.io.IOException; import java.net.URI; import javax.annotation.Nullable; +import com.amazonaws.SdkClientException; import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; import com.amazonaws.auth.AWSSessionCredentials; import com.amazonaws.auth.AnonymousAWSCredentials; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AnonymousCredentialsProvider; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentials; @@ -33,7 +37,10 @@ import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; +import org.apache.hadoop.fs.s3a.CredentialInitializationException; import org.apache.hadoop.fs.s3a.S3AUtils; +import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; @@ -41,8 +48,14 @@ /** * Adapts a V1 {@link AWSCredentialsProvider} to the V2 {@link AwsCredentialsProvider} interface. */ -public final class V1ToV2AwsCredentialProviderAdapter implements AwsCredentialsProvider { +public final class V1ToV2AwsCredentialProviderAdapter + implements AwsCredentialsProvider, Closeable { + private static final Logger LOG = LoggerFactory.getLogger( + V1ToV2AwsCredentialProviderAdapter.class); + /** + * The V1 credential provider constructed. + */ private final AWSCredentialsProvider v1CredentialsProvider; @@ -50,9 +63,45 @@ private V1ToV2AwsCredentialProviderAdapter(AWSCredentialsProvider v1CredentialsP this.v1CredentialsProvider = requireNonNull(v1CredentialsProvider); } + + /** + * Collect v1 credentials and convert to v2. + * @return v2 credentials + * @throws CredentialInitializationException if the inner retrieval raised an exception + */ @Override public AwsCredentials resolveCredentials() { - AWSCredentials toAdapt = v1CredentialsProvider.getCredentials(); + try { + // get the wrapped credentials + AWSCredentials toAdapt = v1CredentialsProvider.getCredentials(); + return convertToV2Credentials(toAdapt); + } catch (SdkClientException e) { + // wrap with a v2 exception so that code which adds a try/catch for v2 sdk exceptions + // gets a compatible exception. + throw new CredentialInitializationException(e.toString(), e); + } + } + + /** + * Close the wrapped provider if it implements Closeable/AutoCloseable. + * @throws IOException failure + */ + @Override + public void close() throws IOException { + if (v1CredentialsProvider instanceof Closeable) { + ((Closeable) v1CredentialsProvider).close(); + } else if (v1CredentialsProvider instanceof AutoCloseable) { + S3AUtils.closeAutocloseables(LOG, (AutoCloseable)v1CredentialsProvider); + } + } + + /** + * Convert v1 credentials to v2, including support for session and anonymous + * credentials. + * @param toAdapt credentials to adapt. + * @return v2 credentials. + */ + static AwsCredentials convertToV2Credentials(final AWSCredentials toAdapt) { if (toAdapt instanceof AWSSessionCredentials) { return AwsSessionCredentials.create(toAdapt.getAWSAccessKeyId(), toAdapt.getAWSSecretKey(), @@ -98,19 +147,19 @@ static AwsCredentialsProvider create(AWSCredentialsProvider v1CredentialsProvide * @param className classname * @param uri URI of the FS * @return the instantiated class - * @throws IOException on any instantiation failure. + * @throws InstantiationIOException on construction and instantiation failures, + * including v1 SDK exceptions. + * @throws IOException if raised by a constructor/factory method. */ static AwsCredentialsProvider create( Configuration conf, String className, - @Nullable URI uri) throws IOException { - - - final AWSCredentialsProvider instance = - S3AUtils.getInstanceFromReflection(className, conf, uri, AWSCredentialsProvider.class, - "getInstance", AWS_CREDENTIALS_PROVIDER); - return create(instance); + @Nullable URI uri) throws InstantiationIOException, IOException { + final AWSCredentialsProvider instance = + S3AUtils.getInstanceFromReflection(className, conf, uri, AWSCredentialsProvider.class, + "getInstance", AWS_CREDENTIALS_PROVIDER); + return create(instance); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java index 3517fabb9579e..2513c2fdb168b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java @@ -35,6 +35,7 @@ import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; import software.amazon.awssdk.services.sts.model.StsException; import org.apache.hadoop.classification.VisibleForTesting; +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.util.Sets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -106,7 +107,7 @@ public AssumedRoleCredentialProvider(@Nullable URI fsUri, Configuration conf) arn = conf.getTrimmed(ASSUMED_ROLE_ARN, ""); if (StringUtils.isEmpty(arn)) { - throw new IOException(E_NO_ROLE); + throw new PathIOException(String.valueOf(fsUri), E_NO_ROLE); } // build up the base provider @@ -115,8 +116,8 @@ public AssumedRoleCredentialProvider(@Nullable URI fsUri, Configuration conf) Arrays.asList( SimpleAWSCredentialsProvider.class, EnvironmentVariableCredentialsProvider.class), - Sets.newHashSet(this.getClass())); - LOG.debug("Credentials to obtain role credentials: {}", credentialsToSTS); + Sets.newHashSet(getClass())); + LOG.debug("Credentials used to obtain role credentials: {}", credentialsToSTS); // then the STS binding sessionName = conf.getTrimmed(ASSUMED_ROLE_SESSION_NAME, @@ -156,9 +157,6 @@ public AssumedRoleCredentialProvider(@Nullable URI fsUri, Configuration conf) // need to retry invoker = new Invoker(new S3ARetryPolicy(conf), this::operationRetried); - // and force in a fail-fast check just to keep the stack traces less - // convoluted - resolveCredentials(); } /** @@ -170,7 +168,7 @@ public AssumedRoleCredentialProvider(@Nullable URI fsUri, Configuration conf) @Retries.RetryRaw public AwsCredentials resolveCredentials() { try { - return invoker.retryUntranslated("getCredentials", + return invoker.retryUntranslated("resolveCredentials", true, stsProvider::resolveCredentials); } catch (IOException e) { @@ -198,13 +196,11 @@ public void close() { @Override public String toString() { - final StringBuilder sb = new StringBuilder( - "AssumedRoleCredentialProvider{"); - sb.append("role='").append(arn).append('\''); - sb.append(", session'").append(sessionName).append('\''); - sb.append(", duration=").append(duration); - sb.append('}'); - return sb.toString(); + String sb = "AssumedRoleCredentialProvider{" + "role='" + arn + '\'' + + ", session'" + sessionName + '\'' + + ", duration=" + duration + + '}'; + return sb; } /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java index 420d13a9e5911..2a644fd385ef1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java @@ -38,6 +38,7 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; import org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider; import org.apache.hadoop.fs.s3a.Constants; @@ -78,29 +79,35 @@ public final class AwsCredentialListProvider { public static final List> STANDARD_AWS_PROVIDERS = Collections.unmodifiableList( Arrays.asList( - TemporaryAWSCredentialsProvider.class, - SimpleAWSCredentialsProvider.class, EnvironmentVariableCredentialsProvider.class, - IAMInstanceCredentialsProvider.class)); + IAMInstanceCredentialsProvider.class, + SimpleAWSCredentialsProvider.class, + TemporaryAWSCredentialsProvider.class)); - public static final String V1_ENVIRONMENT_CREDENTIALS = - "com.amazonaws.auth.EnvironmentVariableCredentialsProvider"; + public static final String ANONYMOUS_CREDENTIALS_V1 = + "com.amazonaws.auth.AnonymousAWSCredentials"; - public static final String V1_EC2_CONTAINER_CREDENTIALS = + public static final String EC2_CONTAINER_CREDENTIALS_V1 = "com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper"; - public static final String V1_EC2_IAM_CREDENTIALS = + public static final String EC2_IAM_CREDENTIALS_V1 = "com.amazonaws.auth.InstanceProfileCredentialsProvider"; - public static final String V1_ANONYMOUS_CREDENTIALS = - "com.amazonaws.auth.AnonymousAWSCredentials"; - - public static final String V2_EC2_IAM_CREDENTIALS = + public static final String EC2_IAM_CREDENTIALS_V2 = IAMInstanceCredentialsProvider.class.getName(); - public static final String V2_ENVIRONMENT_CREDENTIALS = + public static final String ENVIRONMENT_CREDENTIALS_V1 = + "com.amazonaws.auth.EnvironmentVariableCredentialsProvider"; + + public static final String ENVIRONMENT_CREDENTIALS_V2 = EnvironmentVariableCredentialsProvider.class.getName(); + /** + * Private map of v1 to v2 credential provider name mapping + */ + private static final Map V1_V2_CREDENTIAL_PROVIDER_MAP = initCredentialProvidersMap(); + + /** * Create the AWS credentials from the providers, the URI and * the key {@link Constants#AWS_CREDENTIALS_PROVIDER} in the configuration. @@ -154,14 +161,13 @@ private static Collection loadAWSProviderClasses(Configuration conf, private static Map initCredentialProvidersMap() { Map v1v2CredentialProviderMap = new HashMap<>(); - v1v2CredentialProviderMap.put(V1_ENVIRONMENT_CREDENTIALS, - V2_ENVIRONMENT_CREDENTIALS); - v1v2CredentialProviderMap.put(V1_EC2_CONTAINER_CREDENTIALS, - V2_EC2_IAM_CREDENTIALS); - v1v2CredentialProviderMap.put(V1_EC2_IAM_CREDENTIALS, - V2_EC2_IAM_CREDENTIALS); - v1v2CredentialProviderMap.put(V1_ANONYMOUS_CREDENTIALS, + v1v2CredentialProviderMap.put(ANONYMOUS_CREDENTIALS_V1, AnonymousAWSCredentialsProvider.NAME); + v1v2CredentialProviderMap.put(EC2_CONTAINER_CREDENTIALS_V1, + EC2_IAM_CREDENTIALS_V2); + v1v2CredentialProviderMap.put(EC2_IAM_CREDENTIALS_V1, + EC2_IAM_CREDENTIALS_V2); + v1v2CredentialProviderMap.put(ENVIRONMENT_CREDENTIALS_V1, ENVIRONMENT_CREDENTIALS_V2); return v1v2CredentialProviderMap; } @@ -189,7 +195,10 @@ public static AWSCredentialProviderList buildAWSProviderList( key, defaultValues.toArray(new Class[defaultValues.size()])); - Map v1v2CredentialProviderMap = initCredentialProvidersMap(); + Map v1v2CredentialProviderMap = V1_V2_CREDENTIAL_PROVIDER_MAP; + final Set forbiddenClassnames = + forbidden.stream().map(c -> c.getName()).collect(Collectors.toSet()); + // iterate through, checking for forbidden values and then instantiating // each provider @@ -205,13 +214,14 @@ public static AWSCredentialProviderList buildAWSProviderList( } // now scan the forbidden list. doing this after any mappings ensures the v1 names // are also blocked - if (forbidden.contains(className)) { - throw new IOException(E_FORBIDDEN_AWS_PROVIDER - + " in option " + key + ": " + className); + if (forbiddenClassnames.contains(className)) { + throw new PathIOException(String.valueOf(binding), + E_FORBIDDEN_AWS_PROVIDER + " in option " + key + ": " + className); } + AwsCredentialsProvider provider; try { - providers.add(createAWSV2CredentialProvider(conf, className, binding)); + provider = createAWSV2CredentialProvider(conf, className, binding); } catch (InstantiationIOException e) { // failed to create a v2; try to see if it is a v1 if (e.getKind() == InstantiationIOException.Kind.IsNotImplementation) { @@ -220,11 +230,10 @@ public static AWSCredentialProviderList buildAWSProviderList( LOG.debug("Failed to create {} as v2 credentials, trying to instantiate as v1", className); try { - final AwsCredentialsProvider provider = + provider = AwsV1BindingSupport.createAWSV1CredentialProvider(conf, className, binding); LOG_REMAPPED_ENTRY.warn("Credentials option {} contains AWS v1 SDK entry {}", key, className); - providers.add(provider); } catch (InstantiationIOException ex) { // if it is something other than non-implementation, throw. // that way, non-impl messages are about v2 not v1 in the error @@ -245,8 +254,9 @@ public static AWSCredentialProviderList buildAWSProviderList( throw e; } + LOG.debug("From provider class {} created Aws provider {}", className, provider); } - + providers.add(provider); } return providers; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java index f505cfcab5d4f..a28a62fd8ebba 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java @@ -90,4 +90,11 @@ private AwsCredentials getCredentials() { public void close() throws IOException { // no-op. } + + @Override + public String toString() { + return "IAMInstanceCredentialsProvider{" + + "containerCredentialsProvider=" + containerCredentialsProvider + + '}'; + } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java index 4b9fd517b264b..9a14894d05a27 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java @@ -102,7 +102,8 @@ public class SessionTokenBinding extends AbstractDelegationTokenBinding { private boolean hasSessionCreds; /** - * The auth chain for the parent options. + * The parent authentication chain: that used to request + * session/role credentials when deployed unbonded. */ private AWSCredentialProviderList parentAuthChain; @@ -161,12 +162,14 @@ protected void serviceStart() throws Exception { DEFAULT_DELEGATION_TOKEN_REGION); // create the provider set for session credentials. - parentAuthChain = buildAWSProviderList( + final AWSCredentialProviderList chain = buildAWSProviderList( getCanonicalUri(), conf, AWS_CREDENTIALS_PROVIDER, STANDARD_AWS_PROVIDERS, new HashSet<>()); + LOG.debug("Setting parent authentication chain to {}", chain); + setParentAuthChain(chain); } @Override @@ -189,7 +192,7 @@ protected void serviceStop() throws Exception { public AWSCredentialProviderList deployUnbonded() throws IOException { requireServiceStarted(); - return parentAuthChain; + return getParentAuthChain(); } /** @@ -291,7 +294,7 @@ private synchronized Optional maybeInitSTS() // throw this. final AwsCredentials parentCredentials = once("get credentials", "", - () -> parentAuthChain.resolveCredentials()); + () -> getParentAuthChain().resolveCredentials()); hasSessionCreds = parentCredentials instanceof AwsSessionCredentials; if (!hasSessionCreds) { @@ -300,7 +303,7 @@ private synchronized Optional maybeInitSTS() invoker = new Invoker(new S3ARetryPolicy(conf), LOG_EVENT); StsClient tokenService = - STSClientFactory.builder(parentAuthChain, + STSClientFactory.builder(getParentAuthChain(), conf, endpoint, region, @@ -371,7 +374,7 @@ public SessionTokenIdentifier createTokenIdentifier( } origin += " " + CREDENTIALS_CONVERTED_TO_DELEGATION_TOKEN; final AwsCredentials awsCredentials - = parentAuthChain.resolveCredentials(); + = getParentAuthChain().resolveCredentials(); if (awsCredentials instanceof AwsSessionCredentials) { marshalledCredentials = fromAWSCredentials( (AwsSessionCredentials) awsCredentials); @@ -421,4 +424,15 @@ protected void setTokenIdentifier(Optional tokenIdentifier) { this.tokenIdentifier = tokenIdentifier; } + + /** + * The auth chain for the parent options. + */ + protected AWSCredentialProviderList getParentAuthChain() { + return parentAuthChain; + } + + protected void setParentAuthChain(AWSCredentialProviderList parentAuthChain) { + this.parentAuthChain = parentAuthChain; + } } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md index e649a8d76d539..a08e6a9eb859d 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md @@ -20,26 +20,113 @@ This work is tracked in [HADOOP-18073](https://issues.apache.org/jira/browse/HAD ## Why the upgrade? - Moving to SDK V2 will provide performance benefits. -For example, the [transfer manager for SDKV2](https://aws.amazon.com/blogs/developer/introducing-amazon-s3-transfer-manager-in-the-aws-sdk-for-java-2-x/) +For example, the [transfer manager for SDK V2](https://aws.amazon.com/blogs/developer/introducing-amazon-s3-transfer-manager-in-the-aws-sdk-for-java-2-x/) is built using java bindings of the AWS Common Runtime S3 client (https://github.com/awslabs/aws-crt-java) (CRT). CRT is a set of packages written in C, designed for maximising performance when interacting with AWS services such as S3. +- The V1 SDK is essentially in maintenance mode. - New features such as [additional checksum algorithms](https://aws.amazon.com/blogs/aws/new-additional-checksum-algorithms-for-amazon-s3/) -which S3A will benefit from are not available in SDKV1. +which S3A will benefit from are not available in SDK V1. ## What's changing? The [SDK V2](https://github.com/aws/aws-sdk-java-v2) for S3 is very different from [SDK V1](https://github.com/aws/aws-sdk-java), and brings breaking changes for S3A. -A complete list of the changes can be found in the [Changelog](https://github.com/aws/aws-sdk-java-v2/blob/master/docs/LaunchChangelog.md#41-s3-changes). +A complete list of the changes can be found in the +[Changelog](https://github.com/aws/aws-sdk-java-v2/blob/master/docs/LaunchChangelog.md#41-s3-changes). + +# S3A integration changes. + +## Deployment Changes + + +### Packaging: `aws-java-sdk-bundle-1.12.x.jar` becomes `bundle-2.x.y.jar` + +As the module name is lost, in hadoop releases a large JAR file with +the name "bundle" is now part of the distribution. +This is the AWS v2 SDK shaded artifact. + +The new and old SDKs can co-exist; the only place that the hadoop code +may still use the original SDK is when a non-standard V1 AWS credential +provider is declared. + +Any deployment of the S3A connector must include this JAR or +the subset of non-shaded aws- JARs needed for communication +with S3 and any other services used. +As before: the exact set of dependencies used by the S3A connector +is neither defined nor comes with any commitments of stability +or compatibility of dependent libraries. + +### Configuration Option Changes + +### Credential Providers declared in `fs.s3a.aws.credentials.provider` + +V1 Credential providers are *only* supported when the V1 SDK is on the classpath. + +The standard set of v1 credential providers used in hadoop deployments are +automatically remapped to v2 equivalents, +while the stable hadoop providers have been upgraded in place; their names +are unchanged. +As result, standard cluster configurations should seamlessly upgrade. + +| v1 Credential Provider | Remapped V2 substitute | +|-------------------------------------------------------------|----------------------------------------------------------------------------------| +| `com.amazonaws.auth.AnonymousAWSCredentials` | `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider` | +| `com.amazonaws.auth.EnvironmentVariableCredentialsProvider` | `software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider` | +| `com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper` | `org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | +| `com.amazonaws.auth.InstanceProfileCredentialsProvider` | `org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | + + +There are a limited number of troublespots here + +#### Other `com.amazonaws.auth.` AWS providers + +There should be equivalents in the new SDK, but as well as being renamed +they are likely to have moved different factory/builder mechanisms. +Identify the changed classes and use their +names in the `fs.s3a.aws.credentials.provider` option. + +If a v2 equivalent is not found; provided the v1 SDK is added to the classpath, +it should still be possible to use the existing classes. + + +#### Private/third-party credential providers + +Provided the v1 SDK is added to the classpath, +it should still be possible to use the existing classes. + +Adding a v2 equivalent is the recommended long-term solution. + +#### Private subclasses of the Hadoop credential providers + +Because all the standard hadoop credential providers have been upgraded, +any subclasses of these are not going to link or work. + +These will need to be upgraded in source, as covered below. + + +## Source code/binary integration changes The major changes and how this affects S3A are listed below. -### Package Change +### SDK API Package Change -Package names have changed, all classes in SDK V2 are under `software.amazon.awssdk`, SDK V1 classes +* Package names have changed, all classes in SDK V2 are under `software.amazon.awssdk`, SDK V1 classes were under `com.amazonaws`. +* There is no interoperability between them. +* All classnames are different, often in very subtle ways. It is possible to use both in the same + class, as is done in the package `org.apache.hadoop.fs.s3a.adapter`. +* All the core message classes are now automatically generated from a JSON protocol description. +* These all getter methods have been renamed. +* All classes are constructed via builder methods +* Message classes are no longer Java `Serializable`. + +Most of these changes simply add what will feel to be gratuitous migration effort; +the removable of the `Serializable` nature from all messaage response classes can +potentially break applications -such as anything passing them between Spark workers. +See AWS SDK v2 issue [Simplify Modeled Message Marshalling #82](https://github.com/aws/aws-sdk-java-v2/issues/82), +note that it was filed in 2017, then implement your own workaround. ### Credential Providers @@ -49,14 +136,63 @@ has been replaced by [software.amazon.awssdk.auth.credentials.AwsCredentialsProv changed. The change in interface will mean that custom credential providers will need to be updated to now -implement `AwsCredentialsProvider` instead of `AWSCredentialProvider`. +implement `software.amazon.awssdk.auth.credentials.AwsCredentialsProvider` instead of +`com.amazonaws.auth.AWSCredentialsProvider`. + +#### v1 `AWSCredentialsProvider` interface + +Note how the interface begins with the capitalized "AWS" acronym. +The v2 interface starts with "Aws". This is a very subtle change +for developers to spot. +Compilers _will_ detect and report the type mismatch. + +```java +public interface AWSCredentialsProvider { + + public AWSCredentials getCredentials(); + + public void refresh(); + +} + +``` +The interface binding also supported a factory method, `AWSCredentialsProvider instance()` which, +if available, would be invoked in preference to using any constructor. + +#### v2 `AwsCredentialsProvider` interface + +Note how the interface begins with the capitalized "AWS" acronym. +The v2 interface starts with "Aws". This is a very subtle change. +Compilers will detect and report the type mismatch, but it is not +immediately obvious to developers migrating existing code. + +```java +public interface AwsCredentialsProvider { + + AwsCredentials resolveCredentials(); + +} +``` + +1. There is no `refresh()` method any more. +2. `getCredentials()` has become `resolveCredentials()`. +3. There is now the expectation in the SDK that credential resolution/lookup etc will be + performed in `resolveCredentials()`. + -Due to change in class names, references to SDK V1 credential providers -in `fs.s3a.aws.credentials.provider` will need to be updated to reference V2 providers. ### Delegation Tokens -Custom credential providers used in delegation token binding classes will also need to be updated. +1. Custom credential providers used in delegation token binding classes will need to be updated; +2. The return type from delegation token binding has changed to support more class + instances being returned in future. + +`AWSCredentialProviderList` has been upgraded to the V2 API. +* It still retains a `refresh()` method but this is now a deprecated no-op. +* It is still `Closeable`; its `close()` method iterates through all entries in +the list; if they are `Closeable` or `AutoCloseable` then their `close()` method is invoked. +* Accordingly, providers may still perform background refreshes in separate threads; + the S3A client will close its provider list when the filesystem itself is closed. ### AmazonS3 replaced by S3Client @@ -65,7 +201,7 @@ The s3 client is an instance of `S3Client` in V2 rather than `AmazonS3`. For this reason, the `S3ClientFactory` will be deprecated and replaced by one that creates a V2 `S3Client`. -The `getAmazonS3ClientForTesting()` method will also be updated to return the `S3Client`. +The `getAmazonS3ClientForTesting()` method has been updated to return the `S3Client`. ### Signers @@ -74,3 +210,11 @@ has been replaced by [software.amazon.awssdk.core.signer.Signer](https://github. The change in signers will mean the custom signers will need to be updated to implement the new interface. + +### S3A Auditing Extensions. + +The callbacks from the SDK have all changed, as has +the interface `org.apache.hadoop.fs.s3a.audit.AWSAuditEventCallbacks` + +Examine the interface and associated implementations to +see how to migrate. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java index c5281dbf6968e..ceb8f188ab91d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java @@ -36,34 +36,34 @@ import com.amazonaws.auth.AWSCredentials; import com.amazonaws.auth.AWSCredentialsProvider; import org.assertj.core.api.Assertions; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider; -import org.junit.Rule; import org.junit.Test; -import org.junit.rules.ExpectedException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.adapter.V1ToV2AwsCredentialProviderAdapter; import org.apache.hadoop.fs.s3a.auth.AbstractSessionCredentialsProvider; import org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider; +import org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider; import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider; import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException; import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.util.Sets; - import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestConstants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.STANDARD_AWS_PROVIDERS; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.V1_ANONYMOUS_CREDENTIALS; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.V1_EC2_CONTAINER_CREDENTIALS; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.V1_ENVIRONMENT_CREDENTIALS; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.ANONYMOUS_CREDENTIALS_V1; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.EC2_CONTAINER_CREDENTIALS_V1; +import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.ENVIRONMENT_CREDENTIALS_V1; import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList; import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.createAWSCredentialProviderList; import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.DOES_NOT_IMPLEMENT; @@ -82,8 +82,7 @@ public class TestS3AAWSCredentialsProvider { private static final URI TESTFILE_URI = new Path( DEFAULT_CSVTEST_FILE).toUri(); - @Rule - public ExpectedException exception = ExpectedException.none(); + private static final Logger LOG = LoggerFactory.getLogger(AwsCredentialListProvider.class); @Test public void testProviderWrongClass() throws Exception { @@ -170,9 +169,9 @@ public void testV1V2Mapping() throws Exception { EnvironmentVariableCredentialsProvider.class); Configuration conf = createProviderConfiguration(buildClassList( - V1_EC2_CONTAINER_CREDENTIALS, - V1_ANONYMOUS_CREDENTIALS, - V1_ENVIRONMENT_CREDENTIALS)); + EC2_CONTAINER_CREDENTIALS_V1, + ANONYMOUS_CREDENTIALS_V1, + ENVIRONMENT_CREDENTIALS_V1)); AWSCredentialProviderList list1 = createAWSCredentialProviderList( uri1, conf); assertCredentialProviders(expectedClasses, list1); @@ -241,6 +240,30 @@ public void testFallbackToDefaults() throws Throwable { EnvironmentVariableCredentialsProvider.class), Sets.newHashSet()); assertTrue("empty credentials", credentials.size() > 0); + } + + @Test + public void testProviderConstructor() throws Throwable { + final AWSCredentialProviderList list = new AWSCredentialProviderList("name", + new AnonymousAWSCredentialsProvider(), + new ErrorProvider(TESTFILE_URI, new Configuration())); + Assertions.assertThat(list.getProviders()) + .describedAs("provider list in %s", list) + .hasSize(2); + final AwsCredentials credentials = list.resolveCredentials(); + Assertions.assertThat(credentials) + .isInstanceOf(AwsBasicCredentials.class); + assertCredentialResolution(credentials, null, null); + } + + public static void assertCredentialResolution(AwsCredentials creds, String key, String secret) { + Assertions.assertThat(creds.accessKeyId()) + .describedAs("access key of %s", creds) + .isEqualTo(key); + Assertions.assertThat(creds.secretAccessKey()) + .describedAs("secret key of %s", creds) + .isEqualTo(secret); + } @@ -258,41 +281,35 @@ private String buildClassList(String... classes) { * A credential provider declared as abstract, so it cannot be instantiated. */ static abstract class AbstractProvider implements AwsCredentialsProvider { + + @Override + public AwsCredentials resolveCredentials() { + return null; + } } /** * A credential provider whose constructor signature doesn't match. */ protected static class ConstructorSignatureErrorProvider - implements AwsCredentialsProvider { + extends AbstractProvider { @SuppressWarnings("unused") public ConstructorSignatureErrorProvider(String str) { } - - @Override - public AwsCredentials resolveCredentials() { - return null; - } - } /** * A credential provider whose constructor raises an NPE. */ protected static class ConstructorFailureProvider - implements AwsCredentialsProvider { + extends AbstractProvider { @SuppressWarnings("unused") public ConstructorFailureProvider() { throw new NullPointerException("oops"); } - @Override - public AwsCredentials resolveCredentials() { - return null; - } - } @Test @@ -305,19 +322,13 @@ public void testAWSExceptionTranslation() throws Throwable { } } - protected static class AWSExceptionRaisingFactory implements AwsCredentialsProvider { + protected static class AWSExceptionRaisingFactory extends AbstractProvider { public static final String NO_AUTH = "No auth"; public static AwsCredentialsProvider create() { throw new NoAuthWithAWSException(NO_AUTH); } - - @Override - public AwsCredentials resolveCredentials() { - return null; - } - } @Test @@ -327,7 +338,7 @@ public void testFactoryWrongType() throws Throwable { InstantiationIOException.CONSTRUCTOR_EXCEPTION); } - static class FactoryOfWrongType implements AwsCredentialsProvider { + static class FactoryOfWrongType extends AbstractProvider { public static final String NO_AUTH = "No auth"; @@ -513,18 +524,13 @@ public void testIOEInConstructorPropagation() throws Throwable { /** * Credential provider which raises an IOE when constructed. */ - protected static class IOERaisingProvider implements AwsCredentialsProvider { + protected static class IOERaisingProvider extends AbstractProvider { public IOERaisingProvider(URI uri, Configuration conf) throws IOException { throw new InterruptedIOException("expected"); } - @Override - public AwsCredentials resolveCredentials() { - return null; - } - } private static final AwsCredentials EXPECTED_CREDENTIALS = @@ -668,7 +674,7 @@ public void testConcurrentAuthenticationError() throws Throwable { provider.getInitializationException().getMessage().contains("expected error")); } - public static final class LegacyV1CredentialProvider implements AWSCredentialsProvider { + public static class LegacyV1CredentialProvider implements AWSCredentialsProvider { public LegacyV1CredentialProvider() { } @@ -684,19 +690,77 @@ public void refresh() { } } - public static final class LegacyV1CredentialProviderWithConf implements AWSCredentialsProvider { + /** + * V1 credentials with a configuration constructor. + */ + public static final class LegacyV1CredentialProviderWithConf + extends LegacyV1CredentialProvider { public LegacyV1CredentialProviderWithConf(Configuration conf) { } + } - @Override - public AWSCredentials getCredentials() { - return null; + /** + * V1 Credentials whose factory method raises ClassNotFoundException. + * Expect this to fail rather than trigger recursive recovery; + * exception will be wrapped with something intended to be informative. + */ + @Test + public void testV1InstantiationFailurePropagation() throws Throwable { + InstantiationIOException expected = intercept(InstantiationIOException.class, + "simulated CNFE", + () -> createAWSCredentialProviderList( + TESTFILE_URI, + createProviderConfiguration(V1CredentialProviderDoesNotInstantiate.class.getName()))); + // print for the curious + LOG.info("{}", expected.toString()); + } + + + /** + * V1 credentials which raises an instantiation exception. + */ + public static final class V1CredentialProviderDoesNotInstantiate + extends LegacyV1CredentialProvider { + + private V1CredentialProviderDoesNotInstantiate() { } - @Override - public void refresh() { + public static AWSCredentialsProvider getInstance() throws ClassNotFoundException { + throw new ClassNotFoundException("simulated CNFE"); + } + } + + /** + * V2 Credentials whose factory method raises ClassNotFoundException. + * This will fall back to an attempted v1 load which will fail because it + * is the wrong type. + * The exception raised will be from the v2 instantiation attempt, + * not the v1 attempt. + */ + @Test + public void testV2ClassNotFound() throws Throwable { + InstantiationIOException expected = intercept(InstantiationIOException.class, + "simulated v2 CNFE", + () -> createAWSCredentialProviderList( + TESTFILE_URI, + createProviderConfiguration(V2CredentialProviderDoesNotInstantiate.class.getName()))); + // print for the curious + LOG.info("{}", expected.toString()); + } + + /** + * V2 credentials which raises an instantiation exception in + * the factory method. + */ + public static final class V2CredentialProviderDoesNotInstantiate + extends AbstractProvider { + + private V2CredentialProviderDoesNotInstantiate() { + } + public static AwsCredentialsProvider create() throws ClassNotFoundException { + throw new ClassNotFoundException("simulated v2 CNFE"); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index 1c6e00655acb2..5a854b4d4b8bc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -40,6 +40,7 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.AWSBadRequestException; import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; @@ -241,7 +242,7 @@ public void testAssumeRoleCannotAuthAssumedRole() throws Exception { conf.set(ASSUMED_ROLE_CREDENTIALS_PROVIDER, AssumedRoleCredentialProvider.NAME); expectFileSystemCreateFailure(conf, - IOException.class, + PathIOException.class, E_FORBIDDEN_AWS_PROVIDER); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java index 186887d745bfc..86c9bb71cdad6 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java @@ -153,7 +153,8 @@ public static Configuration newAssumedRoleConfig( removeBaseAndBucketOverrides(conf, DELEGATION_TOKEN_BINDING, ASSUMED_ROLE_ARN, - AWS_CREDENTIALS_PROVIDER); + AWS_CREDENTIALS_PROVIDER, + ASSUMED_ROLE_SESSION_DURATION); conf.set(AWS_CREDENTIALS_PROVIDER, AssumedRoleCredentialProvider.NAME); conf.set(ASSUMED_ROLE_ARN, roleARN); conf.set(ASSUMED_ROLE_SESSION_NAME, "test"); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java index 4ee79e7220afc..f9bc63e716ba2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java @@ -20,9 +20,12 @@ import java.util.concurrent.atomic.AtomicLong; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; import org.apache.hadoop.fs.s3a.CredentialInitializationException; /** @@ -31,14 +34,34 @@ public class CountInvocationsProvider implements AwsCredentialsProvider { + private static final Logger LOG = LoggerFactory.getLogger( + CountInvocationsProvider.class); + public static final String NAME = CountInvocationsProvider.class.getName(); public static final AtomicLong COUNTER = new AtomicLong(0); + private final AtomicLong instanceCounter = new AtomicLong(0); @Override public AwsCredentials resolveCredentials() { - COUNTER.incrementAndGet(); - throw new CredentialInitializationException("no credentials"); + final long global = COUNTER.incrementAndGet(); + final long local = instanceCounter.incrementAndGet(); + final String msg = + String.format("counter with global count %d and local count %d", global, local); + LOG.debug("resolving credentials from {}", msg); + throw new CredentialInitializationException("no credentials from " + msg); + } + + public long getInstanceCounter() { + return instanceCounter.get(); + } + + @Override + public String toString() { + return "CountInvocationsProvider{" + + "instanceCounter=" + instanceCounter.get() + + "; global counter=" + COUNTER.get() + + '}'; } public static long getInvocationCount() { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java index 7f13cb3a4d161..efc775966859d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationTokens.java @@ -186,11 +186,15 @@ public void testCreateAndUseDT() throws Throwable { final MarshalledCredentials creds; try(S3ADelegationTokens dt2 = instantiateDTSupport(getConfiguration())) { dt2.start(); + // first creds are good + dt2.getCredentialProviders().resolveCredentials(); + + // reset to the original dt dt2.resetTokenBindingToDT(originalDT); final AwsSessionCredentials awsSessionCreds = verifySessionCredentials( - dt2.getCredentialProviders().resolveCredentials()); + dt2.getCredentialProviders().resolveCredentials()); final MarshalledCredentials origCreds = fromAWSCredentials( awsSessionCreds); From b282db327e4810db5e6928f8f73b995c61aa4133 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 25 Jul 2023 18:01:09 +0100 Subject: [PATCH 03/20] HADOOP-18820. ongoing dev * split v2 and v1 credential test suites * rename AwsCredentialListProvider to CredentialProviderListFactory as it was too close to AWSCredentialProviderList. * yetus feedback * review feedback * more use of InstantiationException over more generic IOEs. Side issue: wondering if we should cut all the S3xLoginHelper stuff? been a long time since s3n existed and we've had to tell people to stop it. Change-Id: I405dc1793b39d9ee0a2f47c0600c2a3040154d75 --- hadoop-tools/hadoop-aws/pom.xml | 2 +- .../fs/s3a/AWSCredentialProviderList.java | 3 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 2 +- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 20 +- .../org/apache/hadoop/fs/s3a/Tristate.java | 2 +- .../fs/s3a/adapter/AwsV1BindingSupport.java | 70 +++--- .../V1ToV2AwsCredentialProviderAdapter.java | 14 +- .../auth/AssumedRoleCredentialProvider.java | 2 +- ...ava => CredentialProviderListFactory.java} | 42 ++-- .../auth/delegation/SessionTokenBinding.java | 4 +- .../hadoop/fs/s3a/impl/AWSClientConfig.java | 2 +- .../fs/s3a/impl/InstantiationIOException.java | 66 +++++- .../tools/hadoop-aws/aws_sdk_upgrade.md | 35 +-- .../fs/s3a/TestS3AAWSCredentialsProvider.java | 147 +++--------- .../adapter/TestV1CredentialsProvider.java | 222 ++++++++++++++++++ .../hadoop/fs/s3a/auth/ITestAssumeRole.java | 2 +- .../delegation/CountInvocationsProvider.java | 4 +- 17 files changed, 414 insertions(+), 225 deletions(-) rename hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/{AwsCredentialListProvider.java => CredentialProviderListFactory.java} (88%) create mode 100644 hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/adapter/TestV1CredentialsProvider.java diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index cd9bd6e875e89..168cf4222d741 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -517,7 +517,7 @@ software.amazon.awssdk.crt aws-crt - compile + test org.assertj diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java index 26700e428fd7f..43a8fe0af24b2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/AWSCredentialProviderList.java @@ -218,8 +218,7 @@ public AwsCredentials resolveCredentials() { * * @return providers */ - @VisibleForTesting - List getProviders() { + public List getProviders() { return providers; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 9fe3163dbf73a..2d50f587bb252 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -230,7 +230,7 @@ import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.Statistic.*; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.INITIALIZE_SPAN; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.createAWSCredentialProviderList; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.createAWSCredentialProviderList; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.STATEMENT_ALLOW_SSE_KMS_RW; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.allowS3Operations; import static org.apache.hadoop.fs.s3a.auth.delegation.S3ADelegationTokens.TokenIssuingPolicy.NoTokensAvailable; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 015f3fb5cf5e0..87ae6e3915d22 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -563,12 +563,20 @@ public static long dateToLong(final Date date) { * @return instance of the specified class * @throws IOException on any problem */ - public static InstanceT getInstanceFromReflection(Class instanceClass, - Configuration conf, @Nullable URI uri, Class interfaceImplemented, String methodName, + public static InstanceT getInstanceFromReflection( + Class instanceClass, + Configuration conf, + @Nullable URI uri, + Class interfaceImplemented, + String methodName, String configKey) throws IOException { - String className = instanceClass.getName(); - return getInstanceFromReflection(className,conf, uri ,interfaceImplemented, methodName,configKey); + return getInstanceFromReflection(instanceClass.getName(), + conf, + uri, + interfaceImplemented, + methodName, + configKey); } /** @@ -653,11 +661,11 @@ public static InstanceT getInstanceFromReflection(String className, throw translateException("Instantiate " + className, "", (SdkException) targetException); } else { // supported constructor or factory method found, but the call failed - throw instantiationException(className , configKey, targetException); + throw instantiationException(className, configKey, targetException); } } catch (ReflectiveOperationException | IllegalArgumentException e) { // supported constructor or factory method found, but the call failed - throw instantiationException(className , configKey, e); + throw instantiationException(className, configKey, e); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Tristate.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Tristate.java index 44ddcda522040..d7123787735a1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Tristate.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Tristate.java @@ -40,7 +40,7 @@ public enum Tristate { @SuppressWarnings("NonSerializableFieldInSerializableClass") private final Optional mapping; - Tristate(final Optional t) { + Tristate(final Optional t) { mapping = t; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java index 5916825f03e52..4bdf419fa73b9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java @@ -20,7 +20,7 @@ import java.io.IOException; import java.net.URI; -import java.util.Optional; +import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.Nullable; import org.slf4j.Logger; @@ -28,67 +28,61 @@ import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.s3a.Tristate; +import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; + +import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.unavailable; /** * Binding support; the sole way which the rest of the code should instantiate v1 SDK libraries. * Uses this class's Classloader for its analysis/loading. */ @SuppressWarnings("StaticNonFinalField") -public class AwsV1BindingSupport { +public final class AwsV1BindingSupport { private static final Logger LOG = LoggerFactory.getLogger( AwsV1BindingSupport.class); + /** + * V1 credential provider classname: {@code}. + */ public static final String CREDENTIAL_PROVIDER_CLASSNAME = "com.amazonaws.auth.AWSCredentialsProvider"; - public static final String NOT_AWS_PROVIDER = - "does not implement AWSCredentialsProvider"; - - public static final String NOT_AWS_V2_PROVIDER = - "does not implement AwsCredentialsProvider"; - - public static final String ABSTRACT_PROVIDER = - "is abstract and therefore cannot be created"; - /** - * Tack availability. + * SDK availability. */ - private static Tristate sdkAvailability = Tristate.UNKNOWN; - - @SuppressWarnings("FieldAccessedSynchronizedAndUnsynchronized") - private static Class credentialProviderClass; + private static final AtomicBoolean sdkAvailability = new AtomicBoolean(checkForAwsV1Sdk()); - static { - isAwsV1SdkAvailable(); + private AwsV1BindingSupport() { } /** - * Is the AWS v1 SDK available - * @param cl classloader to look in. + * Probe for the AWS v1 SDK being available by looking for + * the class {@link #CREDENTIAL_PROVIDER_CLASSNAME}. * @return true if it was found in the classloader */ - public static synchronized boolean isAwsV1SdkAvailable() { + private static boolean checkForAwsV1Sdk() { - final Optional mapping = sdkAvailability.getMapping(); - if (mapping.isPresent()) { - return mapping.get(); - } - // no binding, so calculate it once. try { ClassLoader cl = AwsV1BindingSupport.class.getClassLoader(); - credentialProviderClass = cl.loadClass(CREDENTIAL_PROVIDER_CLASSNAME); + cl.loadClass(CREDENTIAL_PROVIDER_CLASSNAME); LOG.debug("v1 SDK class {} found", CREDENTIAL_PROVIDER_CLASSNAME); - sdkAvailability = Tristate.TRUE; + return true; } catch (Exception e) { LOG.debug("v1 SDK class {} not found", CREDENTIAL_PROVIDER_CLASSNAME, e); - sdkAvailability = Tristate.FALSE; + return false; } - // guaranteed to be non-empty - return sdkAvailability.getMapping().get(); } + /** + * Is the AWS v1 SDK available? + * @return true if it was found in the classloader + */ + public static synchronized boolean isAwsV1SdkAvailable() { + return sdkAvailability.get(); + } + + /** * Create an AWS credential provider from its class by using reflection. The * class must implement one of the following means of construction, which are @@ -107,17 +101,19 @@ public static synchronized boolean isAwsV1SdkAvailable() { * @param conf configuration * @param className credential classname * @param uri URI of the FS + * @param key configuration key to use * @return the instantiated class - * @throws IOException on any instantiation failure, including v1 SDK not found. + * @throws InstantiationIOException on any instantiation failure, including v1 SDK not found + * @throws IOException anything else. */ - public static AwsCredentialsProvider createAWSV1CredentialProvider( + public static AwsCredentialsProvider createAWSV1CredentialProvider( Configuration conf, String className, - @Nullable URI uri) throws IOException { + @Nullable URI uri, + final String key) throws IOException { if (!isAwsV1SdkAvailable()) { - throw new IOException("No AWS v1 SDK available; unable to load " + className); + throw unavailable(className, key, "No AWS v1 SDK available"); } return V1ToV2AwsCredentialProviderAdapter.create(conf, className, uri); - } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java index bcf0f80d8ae79..2cbd9836d3a43 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/V1ToV2AwsCredentialProviderAdapter.java @@ -37,7 +37,6 @@ import software.amazon.awssdk.auth.credentials.AwsSessionCredentials; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; import org.apache.hadoop.fs.s3a.CredentialInitializationException; import org.apache.hadoop.fs.s3a.S3AUtils; import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; @@ -50,6 +49,7 @@ */ public final class V1ToV2AwsCredentialProviderAdapter implements AwsCredentialsProvider, Closeable { + private static final Logger LOG = LoggerFactory.getLogger( V1ToV2AwsCredentialProviderAdapter.class); @@ -91,7 +91,7 @@ public void close() throws IOException { if (v1CredentialsProvider instanceof Closeable) { ((Closeable) v1CredentialsProvider).close(); } else if (v1CredentialsProvider instanceof AutoCloseable) { - S3AUtils.closeAutocloseables(LOG, (AutoCloseable)v1CredentialsProvider); + S3AUtils.closeAutocloseables(LOG, (AutoCloseable) v1CredentialsProvider); } } @@ -148,7 +148,7 @@ static AwsCredentialsProvider create(AWSCredentialsProvider v1CredentialsProvide * @param uri URI of the FS * @return the instantiated class * @throws InstantiationIOException on construction and instantiation failures, - * including v1 SDK exceptions. + * including v1 SDK exceptions. * @throws IOException if raised by a constructor/factory method. */ static AwsCredentialsProvider create( @@ -156,10 +156,10 @@ static AwsCredentialsProvider create( String className, @Nullable URI uri) throws InstantiationIOException, IOException { - final AWSCredentialsProvider instance = - S3AUtils.getInstanceFromReflection(className, conf, uri, AWSCredentialsProvider.class, - "getInstance", AWS_CREDENTIALS_PROVIDER); - return create(instance); + final AWSCredentialsProvider instance = + S3AUtils.getInstanceFromReflection(className, conf, uri, AWSCredentialsProvider.class, + "getInstance", AWS_CREDENTIALS_PROVIDER); + return create(instance); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java index 2513c2fdb168b..0e3f2953710d7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java @@ -54,7 +54,7 @@ import org.apache.hadoop.security.UserGroupInformation; import static org.apache.hadoop.fs.s3a.Constants.*; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.buildAWSProviderList; /** * Support IAM Assumed roles by instantiating an instance of diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java similarity index 88% rename from hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java rename to hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java index 2a644fd385ef1..761a8258ad44e 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AwsCredentialListProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java @@ -38,7 +38,6 @@ import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; import org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider; import org.apache.hadoop.fs.s3a.Constants; @@ -54,11 +53,12 @@ import static org.apache.hadoop.fs.s3a.adapter.AwsV1BindingSupport.isAwsV1SdkAvailable; /** - * This class provides methods to create the list of AWS credential providers. + * This class provides methods to create a {@link AWSCredentialProviderList} + * list of AWS credential providers. */ -public final class AwsCredentialListProvider { +public final class CredentialProviderListFactory { - private static final Logger LOG = LoggerFactory.getLogger(AwsCredentialListProvider.class); + private static final Logger LOG = LoggerFactory.getLogger(CredentialProviderListFactory.class); /** * A v1 entry has been remapped. warn once about this and then shut up. @@ -84,29 +84,38 @@ public final class AwsCredentialListProvider { SimpleAWSCredentialsProvider.class, TemporaryAWSCredentialsProvider.class)); + /** V1 credential provider: {@value}. */ public static final String ANONYMOUS_CREDENTIALS_V1 = "com.amazonaws.auth.AnonymousAWSCredentials"; + /** V1 credential provider: {@value}. */ public static final String EC2_CONTAINER_CREDENTIALS_V1 = "com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper"; + /** V1 credential provider: {@value}. */ public static final String EC2_IAM_CREDENTIALS_V1 = "com.amazonaws.auth.InstanceProfileCredentialsProvider"; + /** V1 credential provider: {@value}. */ public static final String EC2_IAM_CREDENTIALS_V2 = IAMInstanceCredentialsProvider.class.getName(); + /** V1 credential provider: {@value}. */ public static final String ENVIRONMENT_CREDENTIALS_V1 = "com.amazonaws.auth.EnvironmentVariableCredentialsProvider"; + /** V1 credential provider: {@value}. */ public static final String ENVIRONMENT_CREDENTIALS_V2 = EnvironmentVariableCredentialsProvider.class.getName(); /** - * Private map of v1 to v2 credential provider name mapping + * Private map of v1 to v2 credential provider name mapping. */ - private static final Map V1_V2_CREDENTIAL_PROVIDER_MAP = initCredentialProvidersMap(); + private static final Map V1_V2_CREDENTIAL_PROVIDER_MAP = + initCredentialProvidersMap(); + private CredentialProviderListFactory() { + } /** * Create the AWS credentials from the providers, the URI and @@ -177,7 +186,7 @@ private static Map initCredentialProvidersMap() { * support a forbidden list to prevent loops, mandate full secrets, etc. * @param binding Binding URI -may be null * @param conf configuration - * @param key key + * @param key configuration key to use * @param forbidden a possibly empty set of forbidden classes. * @param defaultValues list of default providers. * @return the list of classes, possibly empty @@ -215,13 +224,13 @@ public static AWSCredentialProviderList buildAWSProviderList( // now scan the forbidden list. doing this after any mappings ensures the v1 names // are also blocked if (forbiddenClassnames.contains(className)) { - throw new PathIOException(String.valueOf(binding), - E_FORBIDDEN_AWS_PROVIDER + " in option " + key + ": " + className); + throw new InstantiationIOException(InstantiationIOException.Kind.Forbidden, + className, key, E_FORBIDDEN_AWS_PROVIDER, null); } AwsCredentialsProvider provider; try { - provider = createAWSV2CredentialProvider(conf, className, binding); + provider = createAWSV2CredentialProvider(conf, className, binding, key); } catch (InstantiationIOException e) { // failed to create a v2; try to see if it is a v1 if (e.getKind() == InstantiationIOException.Kind.IsNotImplementation) { @@ -230,10 +239,10 @@ public static AWSCredentialProviderList buildAWSProviderList( LOG.debug("Failed to create {} as v2 credentials, trying to instantiate as v1", className); try { - provider = - AwsV1BindingSupport.createAWSV1CredentialProvider(conf, className, binding); - LOG_REMAPPED_ENTRY.warn("Credentials option {} contains AWS v1 SDK entry {}", - key, className); + provider = + AwsV1BindingSupport.createAWSV1CredentialProvider(conf, className, binding, key); + LOG_REMAPPED_ENTRY.warn("Credentials option {} contains AWS v1 SDK entry {}", + key, className); } catch (InstantiationIOException ex) { // if it is something other than non-implementation, throw. // that way, non-impl messages are about v2 not v1 in the error @@ -267,16 +276,17 @@ public static AWSCredentialProviderList buildAWSProviderList( * @param conf configuration * @param className credential class name * @param uri URI of the FS + * @param key configuration key to use * @return the instantiated class * @throws IOException on any instantiation failure. * @see S3AUtils#getInstanceFromReflection */ private static AwsCredentialsProvider createAWSV2CredentialProvider(Configuration conf, String className, - @Nullable URI uri) throws IOException { + @Nullable URI uri, final String key) throws IOException { LOG.debug("Credential provider class is {}", className); return S3AUtils.getInstanceFromReflection(className, conf, uri, AwsCredentialsProvider.class, - "create", AWS_CREDENTIALS_PROVIDER); + "create", key); } } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java index 9a14894d05a27..5b26148f0526b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java @@ -47,8 +47,8 @@ import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; import static org.apache.hadoop.fs.s3a.Invoker.once; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.STANDARD_AWS_PROVIDERS; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.STANDARD_AWS_PROVIDERS; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.buildAWSProviderList; import static org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding.fromAWSCredentials; import static org.apache.hadoop.fs.s3a.auth.MarshalledCredentialBinding.fromSTSCredentials; import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.*; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java index a69907755eeac..0faa30efc68ca 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSClientConfig.java @@ -348,7 +348,7 @@ private static void initSigner(Configuration conf, if (configKey != null) { String signerOverride = conf.getTrimmed(configKey, ""); if (!signerOverride.isEmpty()) { - LOG.debug("Signer override for {}} = {}", awsServiceIdentifier, signerOverride); + LOG.debug("Signer override for {} = {}", awsServiceIdentifier, signerOverride); clientConfig.putAdvancedOption(SdkAdvancedClientOption.SIGNER, SignerFactory.createSigner(signerOverride, configKey)); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java index 94e8be7100f9d..6faffb7eff519 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java @@ -54,6 +54,18 @@ public class InstantiationIOException extends IOException { */ private final String key; + /** + * An (extensible) enum of kinds of instantiation failure. + */ + public enum Kind { + Forbidden, + InstantiationFailure, + IsAbstract, + IsNotImplementation, + Other, + Unavailable, + UnsupportedConstructor, + } public InstantiationIOException( final Kind kind, @@ -81,10 +93,23 @@ public String getKey() { return key; } + /** + * Class is abstract. + * @param classname classname. + * @param key configuration key + * @return an exception. + */ public static InstantiationIOException isAbstract(String classname, String key) { return new InstantiationIOException(Kind.IsAbstract, classname, key, ABSTRACT_PROVIDER, null); } + /** + * Class does not implement the desired interface. + * @param classname classname. + * @param interfaceName required interface + * @param key configuration key + * @return an exception. + */ public static InstantiationIOException isNotInstanceOf(String classname, String interfaceName, String key) { @@ -92,13 +117,39 @@ public static InstantiationIOException isNotInstanceOf(String classname, key, DOES_NOT_IMPLEMENT + " " + interfaceName, null); } + /** + * Class is unavailable for some reason, likely missing dependency + * @param classname classname. + * @param key configuration key + * @return an exception. + */ + public static InstantiationIOException unavailable(String classname, + String key, + String text) { + return new InstantiationIOException(Kind.Unavailable, + classname, key, text, null); + } - public static InstantiationIOException unsupportedConstructor(String classname, String key) { + /** + * Failure to find a valid constructor (signature, visibility) or + * factory method. + * @param classname classname. + * @param key configuration key + * @return an exception. + */ + public static InstantiationIOException unsupportedConstructor(String classname, + String key) { return new InstantiationIOException(Kind.UnsupportedConstructor, classname, key, CONSTRUCTOR_EXCEPTION, null); } - + /** + * General instantiation failure. + * @param classname classname. + * @param key configuration key + * @param t thrown + * @return an exception. + */ public static InstantiationIOException instantiationException(String classname, String key, Throwable t) { @@ -106,15 +157,4 @@ public static InstantiationIOException instantiationException(String classname, classname, key, INSTANTIATION_EXCEPTION + " " + t, t); } - /** - * An (extensible) enum of kinds. - */ - public enum Kind { - IsAbstract, - UnsupportedConstructor, - IsNotImplementation, - InstantiationFailure, - Other, - } - } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md index a08e6a9eb859d..739090b47c150 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md @@ -45,11 +45,11 @@ A complete list of the changes can be found in the As the module name is lost, in hadoop releases a large JAR file with the name "bundle" is now part of the distribution. -This is the AWS v2 SDK shaded artifact. +This is the AWS v2 SDK shaded artifact. The new and old SDKs can co-exist; the only place that the hadoop code may still use the original SDK is when a non-standard V1 AWS credential -provider is declared. +provider is declared. Any deployment of the S3A connector must include this JAR or the subset of non-shaded aws- JARs needed for communication @@ -78,7 +78,7 @@ As result, standard cluster configurations should seamlessly upgrade. | `com.amazonaws.auth.InstanceProfileCredentialsProvider` | `org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | -There are a limited number of troublespots here +There are still a number of troublespots here: #### Other `com.amazonaws.auth.` AWS providers @@ -114,19 +114,20 @@ The major changes and how this affects S3A are listed below. * Package names have changed, all classes in SDK V2 are under `software.amazon.awssdk`, SDK V1 classes were under `com.amazonaws`. -* There is no interoperability between them. +* There is no interoperability between the old and new classes. * All classnames are different, often in very subtle ways. It is possible to use both in the same class, as is done in the package `org.apache.hadoop.fs.s3a.adapter`. * All the core message classes are now automatically generated from a JSON protocol description. -* These all getter methods have been renamed. +* All getter methods have been renamed. * All classes are constructed via builder methods * Message classes are no longer Java `Serializable`. -Most of these changes simply add what will feel to be gratuitous migration effort; -the removable of the `Serializable` nature from all messaage response classes can +Most of these changes simply create what will feel to be gratuitous migration effort; +the removable of the `Serializable` nature from all message response classes can potentially break applications -such as anything passing them between Spark workers. See AWS SDK v2 issue [Simplify Modeled Message Marshalling #82](https://github.com/aws/aws-sdk-java-v2/issues/82), -note that it was filed in 2017, then implement your own workaround. +note that it was filed in 2017, then implement your own workaround pending that issue +being resolved. ### Credential Providers @@ -139,7 +140,7 @@ The change in interface will mean that custom credential providers will need to implement `software.amazon.awssdk.auth.credentials.AwsCredentialsProvider` instead of `com.amazonaws.auth.AWSCredentialsProvider`. -#### v1 `AWSCredentialsProvider` interface +#### Original v1 `AWSCredentialsProvider` interface Note how the interface begins with the capitalized "AWS" acronym. The v2 interface starts with "Aws". This is a very subtle change @@ -159,6 +160,9 @@ public interface AWSCredentialsProvider { The interface binding also supported a factory method, `AWSCredentialsProvider instance()` which, if available, would be invoked in preference to using any constructor. +If the interface implemented `Closeable` or `AutoCloseable`, these would +be invoked when the provider chain was being shut down. + #### v2 `AwsCredentialsProvider` interface Note how the interface begins with the capitalized "AWS" acronym. @@ -178,15 +182,16 @@ public interface AwsCredentialsProvider { 2. `getCredentials()` has become `resolveCredentials()`. 3. There is now the expectation in the SDK that credential resolution/lookup etc will be performed in `resolveCredentials()`. - +4. If the interface implements `Closeable` or `AutoCloseable`, these will + be invoked when the provider chain is being shut down. ### Delegation Tokens -1. Custom credential providers used in delegation token binding classes will need to be updated; -2. The return type from delegation token binding has changed to support more class - instances being returned in future. - +1. Custom credential providers used in delegation token binding classes will need to be updated +2. The return type from delegation token binding has changed to support more class + instances being returned in the future. + `AWSCredentialProviderList` has been upgraded to the V2 API. * It still retains a `refresh()` method but this is now a deprecated no-op. * It is still `Closeable`; its `close()` method iterates through all entries in @@ -211,6 +216,8 @@ has been replaced by [software.amazon.awssdk.core.signer.Signer](https://github. The change in signers will mean the custom signers will need to be updated to implement the new interface. +There is no support to assist in this migration. + ### S3A Auditing Extensions. The callbacks from the SDK have all changed, as has diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java index ceb8f188ab91d..4b06d596a5661 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3AAWSCredentialsProvider.java @@ -7,7 +7,7 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -33,9 +33,8 @@ import java.util.stream.Collectors; import javax.annotation.Nullable; -import com.amazonaws.auth.AWSCredentials; -import com.amazonaws.auth.AWSCredentialsProvider; import org.assertj.core.api.Assertions; +import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsBasicCredentials; @@ -43,33 +42,33 @@ import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; import software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider; -import org.junit.Test; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.s3a.adapter.V1ToV2AwsCredentialProviderAdapter; import org.apache.hadoop.fs.s3a.auth.AbstractSessionCredentialsProvider; import org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider; -import org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider; import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider; import org.apache.hadoop.fs.s3a.auth.NoAuthWithAWSException; import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; import org.apache.hadoop.io.retry.RetryPolicy; import org.apache.hadoop.util.Sets; -import static org.apache.hadoop.fs.s3a.Constants.*; -import static org.apache.hadoop.fs.s3a.S3ATestConstants.*; -import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.STANDARD_AWS_PROVIDERS; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.ANONYMOUS_CREDENTIALS_V1; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.EC2_CONTAINER_CREDENTIALS_V1; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.ENVIRONMENT_CREDENTIALS_V1; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.buildAWSProviderList; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.createAWSCredentialProviderList; +import static org.apache.hadoop.fs.s3a.Constants.ASSUMED_ROLE_CREDENTIALS_PROVIDER; +import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; +import static org.apache.hadoop.fs.s3a.S3ATestConstants.DEFAULT_CSVTEST_FILE; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.authenticationContains; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.buildClassListString; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.getCSVTestPath; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.STANDARD_AWS_PROVIDERS; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.buildAWSProviderList; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.createAWSCredentialProviderList; import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.DOES_NOT_IMPLEMENT; import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.apache.hadoop.test.LambdaTestUtils.interceptFuture; -import static org.junit.Assert.*; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; /** * Unit tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic. @@ -82,12 +81,13 @@ public class TestS3AAWSCredentialsProvider { private static final URI TESTFILE_URI = new Path( DEFAULT_CSVTEST_FILE).toUri(); - private static final Logger LOG = LoggerFactory.getLogger(AwsCredentialListProvider.class); + private static final Logger LOG = LoggerFactory.getLogger(TestS3AAWSCredentialsProvider.class); @Test public void testProviderWrongClass() throws Exception { expectProviderInstantiationFailure(this.getClass(), - DOES_NOT_IMPLEMENT + " software.amazon.awssdk.auth.credentials.AwsCredentialsProvider");} + DOES_NOT_IMPLEMENT + " software.amazon.awssdk.auth.credentials.AwsCredentialsProvider"); + } @Test public void testProviderAbstractClass() throws Exception { @@ -158,42 +158,6 @@ public void testDefaultChainNoURI() throws Exception { createAWSCredentialProviderList(null, conf)); } - @Test - public void testV1V2Mapping() throws Exception { - URI uri1 = new URI("s3a://bucket1"); - - List> expectedClasses = - Arrays.asList( - IAMInstanceCredentialsProvider.class, - AnonymousAWSCredentialsProvider.class, - EnvironmentVariableCredentialsProvider.class); - Configuration conf = - createProviderConfiguration(buildClassList( - EC2_CONTAINER_CREDENTIALS_V1, - ANONYMOUS_CREDENTIALS_V1, - ENVIRONMENT_CREDENTIALS_V1)); - AWSCredentialProviderList list1 = createAWSCredentialProviderList( - uri1, conf); - assertCredentialProviders(expectedClasses, list1); - } - - @Test - public void testV1Wrapping() throws Exception { - URI uri1 = new URI("s3a://bucket1"); - - List> expectedClasses = - Arrays.asList( - V1ToV2AwsCredentialProviderAdapter.class, - V1ToV2AwsCredentialProviderAdapter.class); - Configuration conf = - createProviderConfiguration(buildClassList( - LegacyV1CredentialProvider.class.getName(), - LegacyV1CredentialProviderWithConf.class.getName())); - AWSCredentialProviderList list1 = createAWSCredentialProviderList( - uri1, conf); - assertCredentialProviders(expectedClasses, list1); - } - @Test public void testConfiguredChain() throws Exception { URI uri1 = new URI("s3a://bucket1"), uri2 = new URI("s3a://bucket2"); @@ -202,7 +166,7 @@ public void testConfiguredChain() throws Exception { IAMInstanceCredentialsProvider.class, AnonymousAWSCredentialsProvider.class, EnvironmentVariableCredentialsProvider.class - ); + ); Configuration conf = createProviderConfiguration(buildClassListString(expectedClasses)); AWSCredentialProviderList list1 = createAWSCredentialProviderList( @@ -263,8 +227,6 @@ public static void assertCredentialResolution(AwsCredentials creds, String key, Assertions.assertThat(creds.secretAccessKey()) .describedAs("secret key of %s", creds) .isEqualTo(secret); - - } private String buildClassList(Class... classes) { @@ -272,6 +234,7 @@ private String buildClassList(Class... classes) { .map(Class::getCanonicalName) .collect(Collectors.joining(",")); } + private String buildClassList(String... classes) { return Arrays.stream(classes) .collect(Collectors.joining(",")); @@ -303,7 +266,7 @@ public ConstructorSignatureErrorProvider(String str) { * A credential provider whose constructor raises an NPE. */ protected static class ConstructorFailureProvider - extends AbstractProvider { + extends AbstractProvider { @SuppressWarnings("unused") public ConstructorFailureProvider() { @@ -322,7 +285,7 @@ public void testAWSExceptionTranslation() throws Throwable { } } - protected static class AWSExceptionRaisingFactory extends AbstractProvider { + protected static class AWSExceptionRaisingFactory extends AbstractProvider { public static final String NO_AUTH = "No auth"; @@ -338,7 +301,7 @@ public void testFactoryWrongType() throws Throwable { InstantiationIOException.CONSTRUCTOR_EXCEPTION); } - static class FactoryOfWrongType extends AbstractProvider { + static class FactoryOfWrongType extends AbstractProvider { public static final String NO_AUTH = "No auth"; @@ -524,7 +487,7 @@ public void testIOEInConstructorPropagation() throws Throwable { /** * Credential provider which raises an IOE when constructed. */ - protected static class IOERaisingProvider extends AbstractProvider { + protected static class IOERaisingProvider extends AbstractProvider { public IOERaisingProvider(URI uri, Configuration conf) throws IOException { @@ -588,9 +551,9 @@ public void testConcurrentAuthentication() throws Throwable { for (Future result : results) { AwsCredentials credentials = result.get(); assertEquals("Access key from credential provider", - "expectedAccessKey", credentials.accessKeyId()); + "expectedAccessKey", credentials.accessKeyId()); assertEquals("Secret key from credential provider", - "expectedSecret", credentials.secretAccessKey()); + "expectedSecret", credentials.secretAccessKey()); } } finally { pool.awaitTermination(10, TimeUnit.SECONDS); @@ -599,7 +562,7 @@ public void testConcurrentAuthentication() throws Throwable { assertTrue( "Provider initialized without errors. isInitialized should be true", - provider.isInitialized()); + provider.isInitialized()); assertTrue( "Provider initialized without errors. hasCredentials should be true", provider.hasCredentials()); @@ -674,62 +637,6 @@ public void testConcurrentAuthenticationError() throws Throwable { provider.getInitializationException().getMessage().contains("expected error")); } - public static class LegacyV1CredentialProvider implements AWSCredentialsProvider { - - public LegacyV1CredentialProvider() { - } - - @Override - public AWSCredentials getCredentials() { - return null; - } - - @Override - public void refresh() { - - } - } - - /** - * V1 credentials with a configuration constructor. - */ - public static final class LegacyV1CredentialProviderWithConf - extends LegacyV1CredentialProvider { - - public LegacyV1CredentialProviderWithConf(Configuration conf) { - } - } - - /** - * V1 Credentials whose factory method raises ClassNotFoundException. - * Expect this to fail rather than trigger recursive recovery; - * exception will be wrapped with something intended to be informative. - */ - @Test - public void testV1InstantiationFailurePropagation() throws Throwable { - InstantiationIOException expected = intercept(InstantiationIOException.class, - "simulated CNFE", - () -> createAWSCredentialProviderList( - TESTFILE_URI, - createProviderConfiguration(V1CredentialProviderDoesNotInstantiate.class.getName()))); - // print for the curious - LOG.info("{}", expected.toString()); - } - - - /** - * V1 credentials which raises an instantiation exception. - */ - public static final class V1CredentialProviderDoesNotInstantiate - extends LegacyV1CredentialProvider { - - private V1CredentialProviderDoesNotInstantiate() { - } - - public static AWSCredentialsProvider getInstance() throws ClassNotFoundException { - throw new ClassNotFoundException("simulated CNFE"); - } - } /** * V2 Credentials whose factory method raises ClassNotFoundException. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/adapter/TestV1CredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/adapter/TestV1CredentialsProvider.java new file mode 100644 index 0000000000000..b0e1b57d75471 --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/adapter/TestV1CredentialsProvider.java @@ -0,0 +1,222 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a.adapter; + +import java.io.IOException; +import java.net.URI; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; + +import com.amazonaws.auth.AWSCredentials; +import com.amazonaws.auth.AWSCredentialsProvider; +import org.assertj.core.api.Assertions; +import org.junit.Test; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; +import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; +import org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider; +import org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider; +import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; + +import static org.apache.hadoop.fs.s3a.Constants.AWS_CREDENTIALS_PROVIDER; +import static org.apache.hadoop.fs.s3a.S3ATestConstants.DEFAULT_CSVTEST_FILE; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.ANONYMOUS_CREDENTIALS_V1; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.EC2_CONTAINER_CREDENTIALS_V1; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.ENVIRONMENT_CREDENTIALS_V1; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.createAWSCredentialProviderList; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +/** + * Unit tests for v1 to v2 credential provider logic. + */ +public class TestV1CredentialsProvider { + + /** + * URI of the landsat images. + */ + private static final URI TESTFILE_URI = new Path( + DEFAULT_CSVTEST_FILE).toUri(); + + private static final Logger LOG = LoggerFactory.getLogger(TestV1CredentialsProvider.class); + + + @Test + public void testV1V2Mapping() throws Exception { + URI uri1 = new URI("s3a://bucket1"); + + List> expectedClasses = + Arrays.asList( + IAMInstanceCredentialsProvider.class, + AnonymousAWSCredentialsProvider.class, + EnvironmentVariableCredentialsProvider.class); + Configuration conf = + createProviderConfiguration(buildClassList( + EC2_CONTAINER_CREDENTIALS_V1, + ANONYMOUS_CREDENTIALS_V1, + ENVIRONMENT_CREDENTIALS_V1)); + AWSCredentialProviderList list1 = createAWSCredentialProviderList( + uri1, conf); + assertCredentialProviders(expectedClasses, list1); + } + + @Test + public void testV1Wrapping() throws Exception { + URI uri1 = new URI("s3a://bucket1"); + + List> expectedClasses = + Arrays.asList( + V1ToV2AwsCredentialProviderAdapter.class, + V1ToV2AwsCredentialProviderAdapter.class); + Configuration conf = + createProviderConfiguration(buildClassList( + LegacyV1CredentialProvider.class.getName(), + LegacyV1CredentialProviderWithConf.class.getName())); + AWSCredentialProviderList list1 = createAWSCredentialProviderList( + uri1, conf); + assertCredentialProviders(expectedClasses, list1); + } + + private String buildClassList(String... classes) { + return Arrays.stream(classes) + .collect(Collectors.joining(",")); + } + + + /** + * Expect a provider to raise an exception on failure. + * @param option aws provider option string. + * @param expectedErrorText error text to expect + * @return the exception raised + * @throws Exception any unexpected exception thrown. + */ + private IOException expectProviderInstantiationFailure(String option, + String expectedErrorText) throws Exception { + return intercept(IOException.class, expectedErrorText, + () -> createAWSCredentialProviderList( + TESTFILE_URI, + createProviderConfiguration(option))); + } + + /** + * Create a configuration with a specific provider. + * @param providerOption option for the aws credential provider option. + * @return a configuration to use in test cases + */ + private Configuration createProviderConfiguration( + final String providerOption) { + Configuration conf = new Configuration(false); + conf.set(AWS_CREDENTIALS_PROVIDER, providerOption); + return conf; + } + + /** + * Asserts expected provider classes in list. + * @param expectedClasses expected provider classes + * @param list providers to check + */ + private static void assertCredentialProviders( + List> expectedClasses, + AWSCredentialProviderList list) { + assertNotNull(list); + List providers = list.getProviders(); + Assertions.assertThat(providers) + .describedAs("providers") + .hasSize(expectedClasses.size()); + for (int i = 0; i < expectedClasses.size(); ++i) { + Class expectedClass = + expectedClasses.get(i); + AwsCredentialsProvider provider = providers.get(i); + assertNotNull( + String.format("At position %d, expected class is %s, but found null.", + i, expectedClass), provider); + assertTrue( + String.format("At position %d, expected class is %s, but found %s.", + i, expectedClass, provider.getClass()), + expectedClass.isAssignableFrom(provider.getClass())); + } + } + + + public static class LegacyV1CredentialProvider implements AWSCredentialsProvider { + + public LegacyV1CredentialProvider() { + } + + @Override + public AWSCredentials getCredentials() { + return null; + } + + @Override + public void refresh() { + + } + } + + /** + * V1 credentials with a configuration constructor. + */ + public static final class LegacyV1CredentialProviderWithConf + extends LegacyV1CredentialProvider { + + public LegacyV1CredentialProviderWithConf(Configuration conf) { + } + } + + /** + * V1 Credentials whose factory method raises ClassNotFoundException. + * Expect this to fail rather than trigger recursive recovery; + * exception will be wrapped with something intended to be informative. + */ + @Test + public void testV1InstantiationFailurePropagation() throws Throwable { + InstantiationIOException expected = intercept(InstantiationIOException.class, + "simulated CNFE", + () -> createAWSCredentialProviderList( + TESTFILE_URI, + createProviderConfiguration(V1CredentialProviderDoesNotInstantiate.class.getName()))); + // print for the curious + LOG.info("{}", expected.toString()); + } + + + /** + * V1 credentials which raises an instantiation exception. + */ + public static final class V1CredentialProviderDoesNotInstantiate + extends LegacyV1CredentialProvider { + + private V1CredentialProviderDoesNotInstantiate() { + } + + public static AWSCredentialsProvider getInstance() throws ClassNotFoundException { + throw new ClassNotFoundException("simulated CNFE"); + } + } + + +} diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index 5a854b4d4b8bc..d3dfdd5aceaa8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -59,7 +59,7 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.touch; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; -import static org.apache.hadoop.fs.s3a.auth.AwsCredentialListProvider.E_FORBIDDEN_AWS_PROVIDER; +import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.E_FORBIDDEN_AWS_PROVIDER; import static org.apache.hadoop.fs.s3a.auth.RoleTestUtils.*; import static org.apache.hadoop.fs.s3a.auth.RoleModel.*; import static org.apache.hadoop.fs.s3a.auth.RolePolicies.*; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java index f9bc63e716ba2..4c7cd5c667999 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/CountInvocationsProvider.java @@ -25,7 +25,6 @@ import software.amazon.awssdk.auth.credentials.AwsCredentials; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; -import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; import org.apache.hadoop.fs.s3a.CredentialInitializationException; /** @@ -40,7 +39,8 @@ public class CountInvocationsProvider public static final String NAME = CountInvocationsProvider.class.getName(); public static final AtomicLong COUNTER = new AtomicLong(0); - private final AtomicLong instanceCounter = new AtomicLong(0); + + private final AtomicLong instanceCounter = new AtomicLong(0); @Override public AwsCredentials resolveCredentials() { From b608465f24ac0bbedbc13acc44754b6027064c23 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 26 Jul 2023 17:21:14 +0100 Subject: [PATCH 04/20] HADOOP-18820. RoleCredentialProvider IDE had warned of use of non-final resolveCredentials() call in superclass, so cut it ...which broke ITestAssumeRole badly ...so reverted, but marked class final +InstantiationIOException is now a subclass of PathIOE; takes URI of filesystem for more details. +fix ITestS3ABucketExistence by clearing endpoint and region bucket settings. Change-Id: I37edc23f76d7be95cd055dcd65d0fdb17e6992f2 --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 2 +- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 11 ++-- .../fs/s3a/adapter/AwsV1BindingSupport.java | 7 +-- .../auth/AssumedRoleCredentialProvider.java | 13 +++-- .../auth/CredentialProviderListFactory.java | 3 +- .../fs/s3a/impl/InstantiationIOException.java | 57 ++++++++++++------- .../tools/hadoop-aws/aws_sdk_upgrade.md | 2 +- .../fs/s3a/ITestS3ABucketExistence.java | 6 ++ .../hadoop/fs/s3a/auth/ITestAssumeRole.java | 11 +++- .../hadoop/fs/s3a/auth/RoleTestUtils.java | 3 + 10 files changed, 74 insertions(+), 41 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 2d50f587bb252..2409fe4bed715 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -852,7 +852,7 @@ protected void verifyBucketExists() throws UnknownStoreException, IOException { if(!trackDurationAndSpan( STORE_EXISTS_PROBE, bucket, null, () -> - invoker.retry("doestBucketExist", bucket, true, () -> { + invoker.retry("doesBucketExist", bucket, true, () -> { try { if (BUCKET_REGIONS.containsKey(bucket)) { return true; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index 87ae6e3915d22..eada648a2f75d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -615,10 +615,10 @@ public static InstanceT getInstanceFromReflection(String className, try { Class instanceClass = S3AUtils.class.getClassLoader().loadClass(className); if (Modifier.isAbstract(instanceClass.getModifiers())) { - throw isAbstract(className, configKey); + throw isAbstract(uri, className, configKey); } if (!interfaceImplemented.isAssignableFrom(instanceClass)) { - throw isNotInstanceOf(className, interfaceImplemented.getName(), configKey); + throw isNotInstanceOf(uri, className, interfaceImplemented.getName(), configKey); } Constructor cons; @@ -649,7 +649,7 @@ public static InstanceT getInstanceFromReflection(String className, } // no supported constructor or factory method found - throw unsupportedConstructor(className, configKey); + throw unsupportedConstructor(uri, className, configKey); } catch (InvocationTargetException e) { Throwable targetException = e.getTargetException(); if (targetException == null) { @@ -661,13 +661,12 @@ public static InstanceT getInstanceFromReflection(String className, throw translateException("Instantiate " + className, "", (SdkException) targetException); } else { // supported constructor or factory method found, but the call failed - throw instantiationException(className, configKey, targetException); + throw instantiationException(uri, className, configKey, targetException); } } catch (ReflectiveOperationException | IllegalArgumentException e) { // supported constructor or factory method found, but the call failed - throw instantiationException(className, configKey, e); + throw instantiationException(uri, className, configKey, e); } - } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java index 4bdf419fa73b9..177952cb9d22b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/adapter/AwsV1BindingSupport.java @@ -20,7 +20,6 @@ import java.io.IOException; import java.net.URI; -import java.util.concurrent.atomic.AtomicBoolean; import javax.annotation.Nullable; import org.slf4j.Logger; @@ -51,7 +50,7 @@ public final class AwsV1BindingSupport { /** * SDK availability. */ - private static final AtomicBoolean sdkAvailability = new AtomicBoolean(checkForAwsV1Sdk()); + private static final boolean SDK_V1_FOUND = checkForAwsV1Sdk(); private AwsV1BindingSupport() { } @@ -79,7 +78,7 @@ private static boolean checkForAwsV1Sdk() { * @return true if it was found in the classloader */ public static synchronized boolean isAwsV1SdkAvailable() { - return sdkAvailability.get(); + return SDK_V1_FOUND; } @@ -112,7 +111,7 @@ public static AwsCredentialsProvider createAWSV1CredentialProvider( @Nullable URI uri, final String key) throws IOException { if (!isAwsV1SdkAvailable()) { - throw unavailable(className, key, "No AWS v1 SDK available"); + throw unavailable(uri, className, key, "No AWS v1 SDK available"); } return V1ToV2AwsCredentialProviderAdapter.create(conf, className, uri); } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java index 0e3f2953710d7..c2ac8fe4c8197 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/AssumedRoleCredentialProvider.java @@ -34,16 +34,15 @@ import software.amazon.awssdk.services.sts.auth.StsAssumeRoleCredentialsProvider; import software.amazon.awssdk.services.sts.model.AssumeRoleRequest; import software.amazon.awssdk.services.sts.model.StsException; -import org.apache.hadoop.classification.VisibleForTesting; -import org.apache.hadoop.fs.PathIOException; -import org.apache.hadoop.util.Sets; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.AWSCredentialProviderList; import org.apache.hadoop.fs.s3a.CredentialInitializationException; import org.apache.hadoop.fs.s3a.Retries; @@ -52,6 +51,7 @@ import org.apache.hadoop.fs.s3a.S3ARetryPolicy; import org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider; import org.apache.hadoop.security.UserGroupInformation; +import org.apache.hadoop.util.Sets; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.auth.CredentialProviderListFactory.buildAWSProviderList; @@ -67,7 +67,7 @@ */ @InterfaceAudience.Public @InterfaceStability.Evolving -public class AssumedRoleCredentialProvider implements AwsCredentialsProvider, +public final class AssumedRoleCredentialProvider implements AwsCredentialsProvider, Closeable { private static final Logger LOG = @@ -157,6 +157,9 @@ public AssumedRoleCredentialProvider(@Nullable URI fsUri, Configuration conf) // need to retry invoker = new Invoker(new S3ARetryPolicy(conf), this::operationRetried); + // and force in a fail-fast check just to keep the stack traces less + // convoluted + resolveCredentials(); } /** @@ -180,7 +183,7 @@ public AwsCredentials resolveCredentials() { "getCredentials failed: " + e, e); } catch (SdkClientException e) { - LOG.error("Failed to get credentials for role {}", + LOG.error("Failed to resolve credentials for role {}", arn, e); throw e; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java index 761a8258ad44e..d41d9522dd268 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java @@ -225,7 +225,7 @@ public static AWSCredentialProviderList buildAWSProviderList( // are also blocked if (forbiddenClassnames.contains(className)) { throw new InstantiationIOException(InstantiationIOException.Kind.Forbidden, - className, key, E_FORBIDDEN_AWS_PROVIDER, null); + binding, className, key, E_FORBIDDEN_AWS_PROVIDER, null); } AwsCredentialsProvider provider; @@ -270,7 +270,6 @@ public static AWSCredentialProviderList buildAWSProviderList( return providers; } - /** * Create an AWS v2 credential provider from its class by using reflection. * @param conf configuration diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java index 6faffb7eff519..8976fb865f6c7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java @@ -18,7 +18,11 @@ package org.apache.hadoop.fs.s3a.impl; -import java.io.IOException; +import java.net.URI; + +import javax.annotation.Nullable; + +import org.apache.hadoop.fs.PathIOException; /** * An instantiation exception raised during reflection-based creation @@ -26,7 +30,7 @@ * Uses an enum of kind so tests/code can examine it, without * creating a full hierarchy of exception classes. */ -public class InstantiationIOException extends IOException { +public class InstantiationIOException extends PathIOException { public static final String ABSTRACT_PROVIDER = "is abstract and therefore cannot be created"; @@ -68,12 +72,13 @@ public enum Kind { } public InstantiationIOException( - final Kind kind, - final String classname, - final String key, - final String message, - final Throwable cause) { - super("Class " + classname + " " + message + Kind kind, + @Nullable URI uri, String classname, + @Nullable String key, + String message, + Throwable cause) { + super(uri!= null ? uri.toString() : "", + "Class " + classname + " " + message + (key != null ? (" (configuration key " + key + ")") : ""), cause); this.kind = kind; @@ -95,66 +100,80 @@ public String getKey() { /** * Class is abstract. + * @param uri URI of filesystem * @param classname classname. * @param key configuration key * @return an exception. */ - public static InstantiationIOException isAbstract(String classname, String key) { - return new InstantiationIOException(Kind.IsAbstract, classname, key, ABSTRACT_PROVIDER, null); + public static InstantiationIOException isAbstract(URI uri, String classname, String key) { + return new InstantiationIOException(Kind.IsAbstract, + uri, classname, key, ABSTRACT_PROVIDER, null); } /** * Class does not implement the desired interface. + * @param uri URI of filesystem * @param classname classname. * @param interfaceName required interface * @param key configuration key * @return an exception. */ - public static InstantiationIOException isNotInstanceOf(String classname, + public static InstantiationIOException isNotInstanceOf( + @Nullable URI uri, + String classname, String interfaceName, String key) { - return new InstantiationIOException(Kind.IsNotImplementation, classname, + return new InstantiationIOException(Kind.IsNotImplementation, uri, classname, key, DOES_NOT_IMPLEMENT + " " + interfaceName, null); } /** - * Class is unavailable for some reason, likely missing dependency + * Class is unavailable for some reason, probably a missing dependency. + * @param uri URI of filesystem * @param classname classname. * @param key configuration key * @return an exception. */ - public static InstantiationIOException unavailable(String classname, + public static InstantiationIOException unavailable( + @Nullable URI uri, + String classname, String key, String text) { return new InstantiationIOException(Kind.Unavailable, - classname, key, text, null); + uri, classname, key, text, null); } /** * Failure to find a valid constructor (signature, visibility) or * factory method. + * @param uri URI of filesystem * @param classname classname. * @param key configuration key * @return an exception. */ - public static InstantiationIOException unsupportedConstructor(String classname, + public static InstantiationIOException unsupportedConstructor( + @Nullable URI uri, + String classname, String key) { return new InstantiationIOException(Kind.UnsupportedConstructor, - classname, key, CONSTRUCTOR_EXCEPTION, null); + uri, classname, key, CONSTRUCTOR_EXCEPTION, null); } /** * General instantiation failure. + * @param uri URI of filesystem * @param classname classname. * @param key configuration key * @param t thrown * @return an exception. */ - public static InstantiationIOException instantiationException(String classname, + public static InstantiationIOException instantiationException( + @Nullable URI uri, + String classname, String key, Throwable t) { return new InstantiationIOException(Kind.InstantiationFailure, - classname, key, INSTANTIATION_EXCEPTION + " " + t, t); + uri, classname, key, INSTANTIATION_EXCEPTION + " " + t, t); } } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md index 739090b47c150..e6c088b2e7a31 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md @@ -192,7 +192,7 @@ public interface AwsCredentialsProvider { 2. The return type from delegation token binding has changed to support more class instances being returned in the future. -`AWSCredentialProviderList` has been upgraded to the V2 API. +`AWSCredentialProviderList` has been upgraded to the V2 API. * It still retains a `refresh()` method but this is now a deprecated no-op. * It is still `Closeable`; its `close()` method iterates through all entries in the list; if they are `Closeable` or `AutoCloseable` then their `close()` method is invoked. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java index 2507ae2f5104f..66808d52a5040 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java @@ -36,8 +36,10 @@ import static org.apache.hadoop.fs.contract.ContractTestUtils.writeDataset; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Constants.AWS_S3_ACCESSPOINT_REQUIRED; +import static org.apache.hadoop.fs.s3a.Constants.ENDPOINT; import static org.apache.hadoop.fs.s3a.Constants.FS_S3A; import static org.apache.hadoop.fs.s3a.Constants.S3A_BUCKET_PROBE; +import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -124,6 +126,10 @@ public static void expectUnknownStore( private Configuration createConfigurationWithProbe(final int probe) { Configuration conf = new Configuration(getFileSystem().getConf()); S3ATestUtils.disableFilesystemCaching(conf); + removeBaseAndBucketOverrides(conf, + S3A_BUCKET_PROBE, + ENDPOINT, + AWS_REGION); conf.setInt(S3A_BUCKET_PROBE, probe); conf.set(AWS_REGION, "eu-west-1"); return conf; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java index d3dfdd5aceaa8..7d604f2ef5149 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestAssumeRole.java @@ -40,7 +40,6 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.LocatedFileStatus; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.AWSBadRequestException; import org.apache.hadoop.fs.s3a.AbstractS3ATestBase; @@ -53,6 +52,7 @@ import org.apache.hadoop.fs.s3a.commit.files.SinglePendingCommit; import org.apache.hadoop.fs.s3a.commit.impl.CommitContext; import org.apache.hadoop.fs.s3a.commit.impl.CommitOperations; +import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; import org.apache.hadoop.fs.s3a.s3guard.S3GuardTool; import org.apache.hadoop.fs.s3a.statistics.CommitterStatistics; @@ -190,7 +190,12 @@ public void testAssumedInvalidRole() throws Throwable { conf.set(ASSUMED_ROLE_ARN, ROLE_ARN_EXAMPLE); interceptClosing(StsException.class, "", - () -> new AssumedRoleCredentialProvider(uri, conf)); + () -> { + AssumedRoleCredentialProvider p = + new AssumedRoleCredentialProvider(uri, conf); + p.resolveCredentials(); + return p; + }); } @Test @@ -242,7 +247,7 @@ public void testAssumeRoleCannotAuthAssumedRole() throws Exception { conf.set(ASSUMED_ROLE_CREDENTIALS_PROVIDER, AssumedRoleCredentialProvider.NAME); expectFileSystemCreateFailure(conf, - PathIOException.class, + InstantiationIOException.class, E_FORBIDDEN_AWS_PROVIDER); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java index 86c9bb71cdad6..852f03ea618fd 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/RoleTestUtils.java @@ -151,6 +151,7 @@ public static Configuration newAssumedRoleConfig( final String roleARN) { Configuration conf = new Configuration(srcConf); removeBaseAndBucketOverrides(conf, + S3A_BUCKET_PROBE, DELEGATION_TOKEN_BINDING, ASSUMED_ROLE_ARN, AWS_CREDENTIALS_PROVIDER, @@ -159,6 +160,8 @@ public static Configuration newAssumedRoleConfig( conf.set(ASSUMED_ROLE_ARN, roleARN); conf.set(ASSUMED_ROLE_SESSION_NAME, "test"); conf.set(ASSUMED_ROLE_SESSION_DURATION, "15m"); + // force in bucket resolution during startup + conf.setInt(S3A_BUCKET_PROBE, 1); disableFilesystemCaching(conf); return conf; } From 4119dbca6ad0d9b79881a57ce96dc289dce3b1b3 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 17 May 2023 15:26:18 +0100 Subject: [PATCH 05/20] HADOOP-18742. AWS v2 SDK: stabilise dependencies with rest of hadoop libraries * explicit jackson declaration in hadoop-aws * cut jackson, eventstream and ion from aws SDK bundle dependencies * sdk-core excludes everything, as only the core interfaces/classes are needed to compile against * aws-crt is test scope. not sure about that. Change-Id: I47f6e10d42c8067df8255eca69799469d7252480 --- hadoop-project/pom.xml | 14 ++++++++++++++ hadoop-tools/hadoop-aws/pom.xml | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 642c80fc2f98f..1419b3a1ad473 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -1132,6 +1132,12 @@ com.amazonaws aws-java-sdk-core ${aws-java-sdk.version} + + + software.amazon.ion + ion-java + + software.amazon.awssdk @@ -1142,6 +1148,14 @@ io.netty * + + software.amazon.eventstream + eventstream + + + com.fasterxml.jackson.dataformat + jackson-dataformat-cbor + diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index 168cf4222d741..abe7314b9db02 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -504,6 +504,10 @@ test test-jar + + com.amazonaws aws-java-sdk-core From e5672ef3052f401e19c6943bf9731c932863d982 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 27 Jul 2023 11:20:47 +0100 Subject: [PATCH 06/20] HADOOP-18820. javadocs and dependencies * HADOOP-18812 list aws SDK v2 libraries in LICENSE-binary * all imports of aws-java-sdk-core excluded * javadoc and some other checkstyle issues reported in builds addressed (HADOOP-18819) Change-Id: I8043b157f18c325fa9599bdc4272f655307c0b6a --- LICENSE-binary | 2 +- hadoop-project/pom.xml | 4 ++-- .../fs/s3a/InconsistentS3ClientFactory.java | 2 +- .../fs/s3a/ProgressableProgressListener.java | 1 + .../auth/CredentialProviderListFactory.java | 4 ++-- .../auth/delegation/SessionTokenBinding.java | 1 + .../apache/hadoop/fs/s3a/impl/AWSHeaders.java | 18 +++++++++--------- .../fs/s3a/impl/InstantiationIOException.java | 1 + .../fs/s3a/audit/AbstractAuditingTest.java | 1 - .../hadoop/fs/s3a/select/StreamPublisher.java | 4 ++-- .../select/TestSelectEventStreamPublisher.java | 4 +++- 11 files changed, 23 insertions(+), 19 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index 72318d9bf0a94..f63345070014a 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -215,7 +215,6 @@ com.aliyun:aliyun-java-sdk-kms:2.11.0 com.aliyun:aliyun-java-sdk-ram:3.1.0 com.aliyun:aliyun-java-sdk-sts:3.0.0 com.aliyun.oss:aliyun-sdk-oss:3.13.2 -com.amazonaws:aws-java-sdk-bundle:1.12.367 com.cedarsoftware:java-util:1.9.0 com.cedarsoftware:json-io:2.5.1 com.fasterxml.jackson.core:jackson-annotations:2.12.7 @@ -364,6 +363,7 @@ org.objenesis:objenesis:2.6 org.xerial.snappy:snappy-java:1.1.10.1 org.yaml:snakeyaml:2.0 org.wildfly.openssl:wildfly-openssl:1.1.3.Final +software.amazon.awssdk:bundle:jar:2.19.12 -------------------------------------------------------------------------------- diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 1419b3a1ad473..576e2e469d4cf 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -1134,8 +1134,8 @@ ${aws-java-sdk.version} - software.amazon.ion - ion-java + * + * diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java index d519c1c0763d8..8baf7a212f92a 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java @@ -65,7 +65,7 @@ private static class FailureInjectionInterceptor implements ExecutionInterceptor */ private final AtomicLong failureCounter = new AtomicLong(0); - FailureInjectionInterceptor(FailureInjectionPolicy policy) { + FailureInjectionInterceptor(FailureInjectionPolicy policy) { this.policy = policy; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java index b614b379bd6b2..7ee6c55c191b7 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/ProgressableProgressListener.java @@ -75,6 +75,7 @@ public void bytesTransferred(TransferListener.Context.BytesTransferred context) /** * Method to invoke after upload has completed. * This can handle race conditions in setup/teardown. + * @param upload upload which has just completed. * @return the number of bytes which were transferred after the notification */ public long uploadCompleted(ObjectTransfer upload) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java index d41d9522dd268..38c18afe57f80 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java @@ -96,7 +96,7 @@ public final class CredentialProviderListFactory { public static final String EC2_IAM_CREDENTIALS_V1 = "com.amazonaws.auth.InstanceProfileCredentialsProvider"; - /** V1 credential provider: {@value}. */ + /** V2 EC2 instance/container credential provider. */ public static final String EC2_IAM_CREDENTIALS_V2 = IAMInstanceCredentialsProvider.class.getName(); @@ -104,7 +104,7 @@ public final class CredentialProviderListFactory { public static final String ENVIRONMENT_CREDENTIALS_V1 = "com.amazonaws.auth.EnvironmentVariableCredentialsProvider"; - /** V1 credential provider: {@value}. */ + /** V2 environment variables credential provider: {@value}. */ public static final String ENVIRONMENT_CREDENTIALS_V2 = EnvironmentVariableCredentialsProvider.class.getName(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java index 5b26148f0526b..09a1ab1c46e77 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/SessionTokenBinding.java @@ -427,6 +427,7 @@ protected void setTokenIdentifier(Optional /** * The auth chain for the parent options. + * @return the parent authentication chain. */ protected AWSCredentialProviderList getParentAuthChain() { return parentAuthChain; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java index 3cb714588bd39..9f2ac229616a1 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java @@ -24,7 +24,7 @@ public interface AWSHeaders { /* - * Standard HTTP Headers + * Standard HTTP Headers. */ String CACHE_CONTROL = "Cache-Control"; @@ -40,22 +40,22 @@ public interface AWSHeaders { String LAST_MODIFIED = "Last-Modified"; /* - * Amazon HTTP Headers used by S3A + * Amazon HTTP Headers used by S3A. */ - /** S3's version ID header */ + /** S3's version ID header. */ String S3_VERSION_ID = "x-amz-version-id"; - /** Header describing what class of storage a user wants */ + /** Header describing what class of storage a user wants. */ String STORAGE_CLASS = "x-amz-storage-class"; - /** Header describing what archive tier the object is in, if any */ + /** Header describing what archive tier the object is in, if any. */ String ARCHIVE_STATUS = "x-amz-archive-status"; - /** Header for optional server-side encryption algorithm */ + /** Header for optional server-side encryption algorithm. */ String SERVER_SIDE_ENCRYPTION = "x-amz-server-side-encryption"; - /** Range header for the get object request */ + /** Range header for the get object request. */ String RANGE = "Range"; /** @@ -65,10 +65,10 @@ public interface AWSHeaders { @Deprecated String CRYPTO_KEY = "x-amz-key"; - /** JSON-encoded description of encryption materials used during encryption */ + /** JSON-encoded description of encryption materials used during encryption. */ String MATERIALS_DESCRIPTION = "x-amz-matdesc"; - /** Header for the optional restore information of an object */ + /** Header for the optional restore information of an objec.t */ String RESTORE = "x-amz-restore"; /** diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java index 8976fb865f6c7..435db879fabf8 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/InstantiationIOException.java @@ -132,6 +132,7 @@ public static InstantiationIOException isNotInstanceOf( * @param uri URI of filesystem * @param classname classname. * @param key configuration key + * @param text text to include * @return an exception. */ public static InstantiationIOException unavailable( diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java index 5c33f19270ebb..e2297e37e50c4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/AbstractAuditingTest.java @@ -25,7 +25,6 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Map; -import java.util.function.Consumer; import java.util.stream.Collectors; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java index c770b8897338f..461aef726876c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/StreamPublisher.java @@ -35,12 +35,12 @@ final class StreamPublisher implements SdkPublisher { private final Iterator iterator; private Boolean done = false; - public StreamPublisher(Stream data, Executor executor) { + StreamPublisher(Stream data, Executor executor) { this.iterator = data.iterator(); this.executor = executor; } - public StreamPublisher(Stream data) { + StreamPublisher(Stream data) { this(data, Runnable::run); } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java index faf32fe4fd94d..fdf3b5b725376 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/select/TestSelectEventStreamPublisher.java @@ -156,7 +156,9 @@ public void handlesErrors() throws IOException { SelectObjectContentEventStream.recordsBuilder() .payload(SdkBytes.fromUtf8String("bar")) .build()) - .map(e -> { throw SdkException.create("error!", null); })); + .map(e -> { + throw SdkException.create("error!", null); + })); try (AbortableInputStream inputStream = selectEventStreamPublisher.toRecordsInputStream(e -> {})) { From d40b3fad1c1419cd9c68d4562428aead47da3331 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 27 Jul 2023 14:48:26 +0100 Subject: [PATCH 07/20] HADOOP-18830. S3 Select: deprecate and declare optional. Eventstream 1.0.1 JAR is used in tests but not in runtime; this is documented. * S3 Select documents move feature from "experimental" to "deprecated" * detail on why it doesn't work on big files Change-Id: I5b34526c97736bba621e897d1cf5b966a6f3af8b --- hadoop-project/pom.xml | 16 +++---- hadoop-tools/hadoop-aws/pom.xml | 5 +++ .../markdown/tools/hadoop-aws/s3_select.md | 43 ++++++++++++++++--- .../site/markdown/tools/hadoop-aws/testing.md | 3 ++ 4 files changed, 53 insertions(+), 14 deletions(-) diff --git a/hadoop-project/pom.xml b/hadoop-project/pom.xml index 576e2e469d4cf..809084cb3f7da 100644 --- a/hadoop-project/pom.xml +++ b/hadoop-project/pom.xml @@ -185,6 +185,7 @@ 1.12.367 2.7.1 2.19.12 + 1.0.1 0.21.0 1.11.2 2.1 @@ -1145,19 +1146,16 @@ ${aws-java-sdk-v2.version} - io.netty + * * - - software.amazon.eventstream - eventstream - - - com.fasterxml.jackson.dataformat - jackson-dataformat-cbor -
+ + software.amazon.eventstream + eventstream + ${aws.evenstream.version} + software.amazon.awssdk.crt aws-crt diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index abe7314b9db02..b02332eca0a38 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -523,6 +523,11 @@ aws-crt test + + software.amazon.eventstream + eventstream + test + org.assertj assertj-core diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3_select.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3_select.md index 886a2d97d246f..97c1ddde869b2 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3_select.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3_select.md @@ -14,7 +14,7 @@ # S3 Select -**Experimental Feature** +**Deprecated Feature** @@ -60,6 +60,20 @@ Record Readers. It's better here to directly use the Apache Spark, Hive, Impala, Flink or similar, which all use the latest ASF-supported libraries. +## Dependencies: eventstream JAR + +To use S3 Select through the S3A connector, an extra JAR MUST be added to the classpath of your application, +`eventstream-1.0.1.jar`.a +For command line tool use, this should be done by adding it to `share/hadoop/common/lib/` + +```xml + + software.amazon.eventstream + eventstream + 1.0.1 + +``` + ## Enabling/Disabling S3 Select S3 Select is enabled by default: @@ -288,10 +302,12 @@ hadoop s3guard \ ``` -## Use in MR/Analytics queries: Work in Progress +## Use in MR/Analytics queries: Partially Supported -S3 Select support in analytics queries is a work in progress. It does -not work reliably with large source files where the work is split up. +S3 Select support in analytics queries is only partially supported. +It does not work reliably with large source files where the work is split up, +and as the various query engines all assume that .csv and .json formats are splittable, +things go very wrong, fast. As a proof of concept *only*, S3 Select queries can be made through MapReduce jobs which use any Hadoop `RecordReader` @@ -663,6 +679,24 @@ to the `get()` call: do it. ## Troubleshooting +### `NoClassDefFoundError: software/amazon/eventstream/MessageDecoder` + +Select operation failing with a missing evenstream class. + +``` +java.io.IOException: java.lang.NoClassDefFoundError: software/amazon/eventstream/MessageDecoder + at org.apache.hadoop.fs.s3a.select.SelectObjectContentHelper.select(SelectObjectContentHelper.java:75) + at org.apache.hadoop.fs.s3a.WriteOperationHelper.lambda$select$10(WriteOperationHelper.java:660) + at org.apache.hadoop.fs.store.audit.AuditingFunctions.lambda$withinAuditSpan$0(AuditingFunctions.java:62) + at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:122) +``` + +The eventstream JAR is not on the classpath/not in sync with the version of the full "bundle.jar" JDK + +Fix: get a compatible version of the JAR on the classpath. + +### SQL errors + Getting S3 Select code to work is hard, though those knowledgeable in SQL will find it easier. @@ -673,7 +707,6 @@ Problems can be split into: 1. Datatype casting issues 1. Bad records/data in source files. 1. Failure to configure MR jobs to work correctly. -1. Failure of MR jobs due to The exceptions here are all based on the experience during writing tests; more may surface with broader use. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index 033bc241b6664..34ce7ea6034cc 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -1287,10 +1287,13 @@ time bin/hadoop fs -copyToLocal -t 10 $BUCKET/\*aws\* tmp # --------------------------------------------------- # S3 Select on Landsat +# this will fail with a ClassNotFoundException unless +# eventstore JAR is added to the classpath # --------------------------------------------------- export LANDSATGZ=s3a://landsat-pds/scene_list.gz + bin/hadoop s3guard select -header use -compression gzip $LANDSATGZ \ "SELECT s.entityId,s.cloudCover FROM S3OBJECT s WHERE s.cloudCover < '0.0' LIMIT 100" From 3f5181cbeee6505a228eab5816e4c9286f69c99a Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 27 Jul 2023 17:07:15 +0100 Subject: [PATCH 08/20] HADOOP-18820 reinstate aws-crt as mandatory; mention in troubleshooting + update troubleshooting to new package names Change-Id: Ifbba1bf7188518aff9f58e911534ff600b958ad6 --- LICENSE-binary | 1 + hadoop-tools/hadoop-aws/pom.xml | 1 - .../tools/hadoop-aws/troubleshooting_s3a.md | 47 ++++++++++++++++--- 3 files changed, 41 insertions(+), 8 deletions(-) diff --git a/LICENSE-binary b/LICENSE-binary index f63345070014a..851d839cdbd8d 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -364,6 +364,7 @@ org.xerial.snappy:snappy-java:1.1.10.1 org.yaml:snakeyaml:2.0 org.wildfly.openssl:wildfly-openssl:1.1.3.Final software.amazon.awssdk:bundle:jar:2.19.12 +software.amazon.awssdk.crt:aws-crt:0.21.0 -------------------------------------------------------------------------------- diff --git a/hadoop-tools/hadoop-aws/pom.xml b/hadoop-tools/hadoop-aws/pom.xml index b02332eca0a38..3591ab4ea5a50 100644 --- a/hadoop-tools/hadoop-aws/pom.xml +++ b/hadoop-tools/hadoop-aws/pom.xml @@ -521,7 +521,6 @@ software.amazon.awssdk.crt aws-crt - test software.amazon.eventstream diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index 3cd3bb43c5daa..f548b11ce8d62 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -70,14 +70,45 @@ These are Hadoop filesystem client classes, found in the `hadoop-aws` JAR. An exception reporting this class as missing means that this JAR is not on the classpath. -### `ClassNotFoundException: com.amazonaws.services.s3.AmazonS3Client` -(or other `com.amazonaws` class.) +### `NoClassDefFoundError: software/amazon/awssdk/crt/s3/S3MetaRequest` + +The library `aws-crt.jar` is not on the classpath. Its classes +are not in the AWS `bundle.jar` file, yet are needed for uploading +and renaming objects. + +Fix: add. + +``` +java.lang.BootstrapMethodError: java.lang.NoClassDefFoundError: software/amazon/awssdk/crt/s3/S3MetaRequest +at software.amazon.awssdk.services.s3.internal.crt.S3MetaRequestPauseObservable.(S3MetaRequestPauseObservable.java:33) +at software.amazon.awssdk.transfer.s3.internal.DefaultS3TransferManager.uploadFile(DefaultS3TransferManager.java:205) +at org.apache.hadoop.fs.s3a.S3AFileSystem.putObject(S3AFileSystem.java:3064) +at org.apache.hadoop.fs.s3a.S3AFileSystem.executePut(S3AFileSystem.java:4054) + +``` +### `ClassNotFoundException: software.amazon.awssdk.services.s3.S3Client` -This means that the `aws-java-sdk-bundle.jar` JAR is not on the classpath: +(or other `software.amazon` class.) + +This means that the AWS V2 SDK `bundle.jar` JAR is not on the classpath: add it. -### `java.lang.NoSuchMethodError` referencing a `com.amazonaws` class +### `ClassNotFoundException: com.amazonaws.auth.AWSCredentials` + +(or other `com.amazonaws` class.) + +With the move to the [V2 AWS SDK](../aws_sdk_upgrade.html), +the v1 SDK classes are no longer on the classpath. + +If this happens when trying to use a custom credential provider defined +in `fs.s3a.aws.credentials.provider`, then add the `aws-sdk-bundle.jar` +JAR to the classpath. + +If this happens in your own/third-party code, then again, add the JAR, +and/or consider moving to the v2 sdk yourself. + +### `java.lang.NoSuchMethodError` referencing a `software.amazon` class This can be triggered by incompatibilities between the AWS SDK on the classpath and the version which Hadoop was compiled with. @@ -86,12 +117,14 @@ The AWS SDK JARs change their signature enough between releases that the only way to safely update the AWS SDK version is to recompile Hadoop against the later version. -The sole fix is to use the same version of the AWS SDK with which Hadoop +The fix is to use the same version of the AWS SDK with which Hadoop was built. This can also be caused by having more than one version of an AWS SDK -JAR on the classpath. If the full `aws-java-sdk-bundle<` JAR is on the -classpath, do not add any of the `aws-sdk-` JARs. +JAR on the classpath. If the full `bundle.jar` JAR is on the +classpath, do not add any of the `aws-sdk-` JARs *except* for +`aws-crt.jar` (which is required) and +`eventstream.jar` which is required when using S3 Select. ### `java.lang.NoSuchMethodError` referencing an `org.apache.hadoop` class From b73afe2d1503941b3b96e8ec1340a10199f0403c Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 31 Jul 2023 16:37:48 +0100 Subject: [PATCH 09/20] HADOOP-18820. Mukund comments on docs Change-Id: Ie4c14a041595e17cc0e64ff65c2a8db6b60c2316 --- .../site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md | 5 ----- .../src/site/markdown/tools/hadoop-aws/s3_select.md | 10 +++++----- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md index e6c088b2e7a31..38a5823e87656 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md @@ -165,11 +165,6 @@ be invoked when the provider chain was being shut down. #### v2 `AwsCredentialsProvider` interface -Note how the interface begins with the capitalized "AWS" acronym. -The v2 interface starts with "Aws". This is a very subtle change. -Compilers will detect and report the type mismatch, but it is not -immediately obvious to developers migrating existing code. - ```java public interface AwsCredentialsProvider { diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3_select.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3_select.md index 97c1ddde869b2..d18d07b9189af 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3_select.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/s3_select.md @@ -681,14 +681,14 @@ to the `get()` call: do it. ### `NoClassDefFoundError: software/amazon/eventstream/MessageDecoder` -Select operation failing with a missing evenstream class. +Select operation failing with a missing eventstream class. ``` java.io.IOException: java.lang.NoClassDefFoundError: software/amazon/eventstream/MessageDecoder - at org.apache.hadoop.fs.s3a.select.SelectObjectContentHelper.select(SelectObjectContentHelper.java:75) - at org.apache.hadoop.fs.s3a.WriteOperationHelper.lambda$select$10(WriteOperationHelper.java:660) - at org.apache.hadoop.fs.store.audit.AuditingFunctions.lambda$withinAuditSpan$0(AuditingFunctions.java:62) - at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:122) +at org.apache.hadoop.fs.s3a.select.SelectObjectContentHelper.select(SelectObjectContentHelper.java:75) +at org.apache.hadoop.fs.s3a.WriteOperationHelper.lambda$select$10(WriteOperationHelper.java:660) +at org.apache.hadoop.fs.store.audit.AuditingFunctions.lambda$withinAuditSpan$0(AuditingFunctions.java:62) +at org.apache.hadoop.fs.s3a.Invoker.once(Invoker.java:122) ``` The eventstream JAR is not on the classpath/not in sync with the version of the full "bundle.jar" JDK From f8ecbbbced48681a38c8cea2d3cf3643ff92cf69 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 31 Jul 2023 17:42:16 +0100 Subject: [PATCH 10/20] HADOOP-18820. comment from HADOOP-18795 applied Change-Id: I0e4a27ad7b32667c97d914fc8bd6ee52e755bf4c --- .../hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java index 0d9b2d64b3ec3..f5c9c6267ce10 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/delegation/S3ADelegationTokens.java @@ -120,11 +120,6 @@ public class S3ADelegationTokens extends AbstractDTService { */ private AbstractDelegationTokenBinding tokenBinding; - /** - * List of cred providers; unset until {@link #bindToDelegationToken(Token)}. - */ - //private Optional credentialProviders = Optional.empty(); - /** * delegation binding information; unset until {@link #bindToDelegationToken(Token)}. */ From 0cf85e06539e42082af9245193cc8ab7779ae497 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 1 Aug 2023 11:41:20 +0100 Subject: [PATCH 11/20] HADOOP-18820. javadoc and spotbug complaints. Change-Id: I042b5036c4ad0505ea3c1d7a5f96ad35171104a2 --- .../org/apache/hadoop/fs/s3a/S3AFileSystem.java | 15 ++++++++++++--- .../s3a/auth/CredentialProviderListFactory.java | 2 +- .../org/apache/hadoop/fs/s3a/impl/AWSHeaders.java | 2 +- 3 files changed, 14 insertions(+), 5 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index 2409fe4bed715..c71839cc581ae 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -294,6 +294,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private Path workingDir; private String username; private S3Client s3Client; + /** Async client is used for transfer manager and s3 select. */ private S3AsyncClient s3AsyncClient; // initial callback policy is fail-once; it's there just to assist // some mock tests and other codepaths trying to call the low level @@ -986,7 +987,7 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { S3ClientFactory clientFactory = ReflectionUtils.newInstance(s3ClientFactoryClass, conf); s3Client = clientFactory.createS3Client(getUri(), parameters); createS3AsyncClient(clientFactory, parameters); - transferManager = clientFactory.createS3TransferManager(s3AsyncClient); + transferManager = clientFactory.createS3TransferManager(getS3AsyncClient()); } /** @@ -1201,6 +1202,14 @@ public RequestFactory getRequestFactory() { return requestFactory; } + /** + * Get the S3 Async client; synchronized to keep spotbugs quiet. + * @return the async s3 client. + */ + private synchronized S3AsyncClient getS3AsyncClient() { + return s3AsyncClient; + } + /** * Implementation of all operations used by delegation tokens. */ @@ -1761,7 +1770,7 @@ private final class WriteOperationHelperCallbacksImpl public CompletableFuture selectObjectContent( SelectObjectContentRequest request, SelectObjectContentResponseHandler responseHandler) { - return s3AsyncClient.selectObjectContent(request, responseHandler); + return getS3AsyncClient().selectObjectContent(request, responseHandler); } @Override @@ -4178,7 +4187,7 @@ public void close() throws IOException { protected synchronized void stopAllServices() { closeAutocloseables(LOG, transferManager, s3Client, - s3AsyncClient); + getS3AsyncClient()); transferManager = null; s3Client = null; s3AsyncClient = null; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java index 38c18afe57f80..443e7d8185642 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java @@ -104,7 +104,7 @@ public final class CredentialProviderListFactory { public static final String ENVIRONMENT_CREDENTIALS_V1 = "com.amazonaws.auth.EnvironmentVariableCredentialsProvider"; - /** V2 environment variables credential provider: {@value}. */ + /** V2 environment variables credential provider. */ public static final String ENVIRONMENT_CREDENTIALS_V2 = EnvironmentVariableCredentialsProvider.class.getName(); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java index 9f2ac229616a1..e0d6fa5aecc0b 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/AWSHeaders.java @@ -68,7 +68,7 @@ public interface AWSHeaders { /** JSON-encoded description of encryption materials used during encryption. */ String MATERIALS_DESCRIPTION = "x-amz-matdesc"; - /** Header for the optional restore information of an objec.t */ + /** Header for the optional restore information of an object. */ String RESTORE = "x-amz-restore"; /** From 90abbde52c96865759881ffc7401b62473fa31e2 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 9 Aug 2023 21:03:03 +0100 Subject: [PATCH 12/20] HADOOP-18820. migration; cut back on V2Migration, remove InconsistentS3ClientFactory * V2Migration warnings cut back * remove InconsistentS3ClientFactory as obsolete * more details on migration based on fresh stack traces Change-Id: I8a83f8ba995035ed1079f288cdf5ada6e0057774 --- .../org/apache/hadoop/fs/s3a/Constants.java | 7 ++ .../fs/s3a/InconsistentS3ClientFactory.java | 111 ------------------ .../apache/hadoop/fs/s3a/S3AFileSystem.java | 12 +- .../org/apache/hadoop/fs/s3a/S3AUtils.java | 42 ------- .../hadoop/fs/s3a/auth/SignerFactory.java | 2 +- .../hadoop/fs/s3a/impl/V2Migration.java | 62 ++-------- .../tools/hadoop-aws/aws_sdk_upgrade.md | 88 +++++++++++++- .../site/markdown/tools/hadoop-aws/testing.md | 52 +------- .../tools/hadoop-aws/troubleshooting_s3a.md | 4 +- .../hadoop/fs/s3a/AbstractS3AMockTest.java | 2 +- .../hadoop/fs/s3a/ITestS3ACannedACLs.java | 2 +- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 12 +- .../hadoop/fs/s3a/ITestS3AMiscOperations.java | 2 +- .../hadoop/fs/s3a/TestS3ADeleteOnExit.java | 2 +- .../ITestDirectoryMarkerListing.java | 2 +- 15 files changed, 125 insertions(+), 277 deletions(-) delete mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 0d7e9966ecaca..8ed7017c4b9b2 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -727,11 +727,18 @@ private Constants() { public static final String STREAM_READ_GAUGE_INPUT_POLICY = "stream_read_gauge_input_policy"; + /** + * S3 Client Factory implementation class: {@value}. + * Unstable and incompatible between v1 and v2 SDK versions. + */ @InterfaceAudience.Private @InterfaceStability.Unstable public static final String S3_CLIENT_FACTORY_IMPL = "fs.s3a.s3.client.factory.impl"; + /** + * Default factory: {@value}. + */ @InterfaceAudience.Private @InterfaceStability.Unstable public static final Class diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java deleted file mode 100644 index 8baf7a212f92a..0000000000000 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/InconsistentS3ClientFactory.java +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.hadoop.fs.s3a; - -import java.io.IOException; -import java.util.concurrent.atomic.AtomicLong; - -import software.amazon.awssdk.awscore.exception.AwsServiceException; -import software.amazon.awssdk.core.client.config.ClientOverrideConfiguration; -import software.amazon.awssdk.core.exception.SdkException; -import software.amazon.awssdk.core.interceptor.Context; -import software.amazon.awssdk.core.interceptor.ExecutionAttributes; -import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; - -import org.apache.hadoop.classification.InterfaceAudience; -import org.apache.hadoop.classification.InterfaceStability; -import org.apache.hadoop.conf.Configuration; - -/** - * S3 Client factory used for testing with eventual consistency fault injection. - * This client is for testing only; it is in the production - * {@code hadoop-aws} module to enable integration tests to use this - * just by editing the Hadoop configuration used to bring up the client. - * - * The factory injects an {@link ExecutionInterceptor} to inject failures. - */ -@InterfaceAudience.Private -@InterfaceStability.Unstable -public class InconsistentS3ClientFactory extends DefaultS3ClientFactory { - - @Override - protected ClientOverrideConfiguration createClientOverrideConfiguration( - S3ClientCreationParameters parameters, Configuration conf) throws IOException { - LOG.warn("** FAILURE INJECTION ENABLED. Do not run in production! **"); - LOG.warn("List inconsistency is no longer emulated; only throttling and read errors"); - return super.createClientOverrideConfiguration(parameters, conf) - .toBuilder() - .addExecutionInterceptor(new FailureInjectionInterceptor( - new FailureInjectionPolicy(conf))) - .build(); - } - - private static class FailureInjectionInterceptor implements ExecutionInterceptor { - - private final FailureInjectionPolicy policy; - - /** - * Counter of failures since last reset. - */ - private final AtomicLong failureCounter = new AtomicLong(0); - - FailureInjectionInterceptor(FailureInjectionPolicy policy) { - this.policy = policy; - } - - @Override - public void beforeExecution(Context.BeforeExecution context, - ExecutionAttributes executionAttributes) { - maybeFail(); - } - - private void maybeFail() { - maybeFail("throttled", 503); - } - - /** - * Conditionally fail the operation. - * @param errorMsg description of failure - * @param statusCode http status code for error - * @throws SdkException if the client chooses to fail - * the request. - */ - private void maybeFail(String errorMsg, int statusCode) - throws SdkException { - // code structure here is to line up for more failures later - AwsServiceException ex = null; - if (FailureInjectionPolicy.trueWithProbability(policy.getThrottleProbability())) { - // throttle the request - ex = AwsServiceException.builder() - .message(errorMsg + " count = " + (failureCounter.get() + 1)) - .statusCode(statusCode) - .build(); - } - - int failureLimit = policy.getFailureLimit(); - if (ex != null) { - long count = failureCounter.incrementAndGet(); - if (failureLimit == 0 - || (failureLimit > 0 && count < failureLimit)) { - throw ex; - } - } - } - } -} diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index c71839cc581ae..d59279abbea2f 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -926,7 +926,6 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { // with it if so. LOG.debug("Using delegation tokens"); - V2Migration.v1DelegationTokenCredentialProvidersUsed(); S3ADelegationTokens tokens = new S3ADelegationTokens(); this.delegationTokens = Optional.of(tokens); tokens.bindToFileSystem(getCanonicalUri(), @@ -1349,16 +1348,22 @@ public int getDefaultPort() { return 0; } + /** + * A log for warning of aws s3 client use; only logs once per process. + */ + private static final LogExactlyOnce AWS_CLIENT_LOG = new LogExactlyOnce(LOG); + /** * Returns the S3 client used by this filesystem. + * Will log once first, to discourage use. * Warning: this must only be used for testing, as it bypasses core * S3A operations. * @param reason a justification for requesting access. * @return S3Client */ @VisibleForTesting - public S3Client getAmazonS3ClientForTesting(String reason) { - LOG.warn("Access to S3 client requested, reason {}", reason); + public S3Client getAmazonS3V2ClientForTesting(String reason) { + AWS_CLIENT_LOG.warn("Access to S3 client requested, reason {}", reason); return s3Client; } @@ -2489,7 +2494,6 @@ public int getMaxKeys() { @Retries.RetryTranslated @InterfaceStability.Evolving public HeadObjectResponse getObjectMetadata(Path path) throws IOException { - V2Migration.v1GetObjectMetadataCalled(); return trackDurationAndSpan(INVOCATION_GET_FILE_STATUS, path, () -> getObjectMetadata(makeQualified(path), null, invoker, "getObjectMetadata")); diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java index eada648a2f75d..398a2c44ae198 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AUtils.java @@ -537,48 +537,6 @@ public static long dateToLong(final Date date) { return date.getTime(); } - /*** - * Creates an instance of a class using reflection. The - * class must implement one of the following means of construction, which are - * attempted in order: - * - *
    - *
  1. a public constructor accepting java.net.URI and - * org.apache.hadoop.conf.Configuration
  2. - *
  3. a public constructor accepting - * org.apache.hadoop.conf.Configuration
  4. - *
  5. a public static method named as per methodName, that accepts no - * arguments and returns an instance of - * specified type, or
  6. - *
  7. a public default constructor.
  8. - *
- * - * @param instanceClass Class for which instance is to be created - * @param conf configuration - * @param uri URI of the FS - * @param interfaceImplemented interface that this class implements - * @param methodName name of factory method to be invoked - * @param configKey config key under which this class is specified - * @param Instance of class - * @return instance of the specified class - * @throws IOException on any problem - */ - public static InstanceT getInstanceFromReflection( - Class instanceClass, - Configuration conf, - @Nullable URI uri, - Class interfaceImplemented, - String methodName, - String configKey) throws IOException { - - return getInstanceFromReflection(instanceClass.getName(), - conf, - uri, - interfaceImplemented, - methodName, - configKey); - } - /** * Creates an instance of a class using reflection. The * class must implement one of the following means of construction, which are diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java index 7beabb9fa3c84..c786086947fac 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/SignerFactory.java @@ -106,7 +106,7 @@ public static Signer createSigner(String signerType, String configKey) throws IO LOG.debug("Signer class is {}", className); Signer signer = - S3AUtils.getInstanceFromReflection(signerClass, null, null, Signer.class, "create", + S3AUtils.getInstanceFromReflection(className, null, null, Signer.class, "create", configKey); return signer; diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java index c9156f42047b2..51ccdb857a149 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java @@ -28,6 +28,16 @@ /** * This class provides utility methods required for migrating S3A to AWS Java SDK V2. * For more information on the upgrade, see HADOOP-18073. + * + *

in HADOOP-18382. Upgrade AWS SDK to V2 - Prerequisites, + * this class contained a series of `LogExactlyOnce` loggers to warn on + * the first use of a feature which would change incompatibly; this shipped in Hadoop 3.3.5. + *

+ * With the move to v2 completed, attempts to use the v1 classes, will fail + * -except for the special case of support for v1 credential providers. + *

+ * The warning methods are still present, where appropriate, but downgraded to debug + * and only retained for debugging migration issues. */ public final class V2Migration { @@ -35,63 +45,25 @@ private V2Migration() { } public static final Logger SDK_V2_UPGRADE_LOG = LoggerFactory.getLogger(SDK_V2_UPGRADE_LOG_NAME); - private static final LogExactlyOnce WARN_ON_DELEGATION_TOKENS = - new LogExactlyOnce(SDK_V2_UPGRADE_LOG); - - private static final LogExactlyOnce WARN_ON_GET_S3_CLIENT = - new LogExactlyOnce(SDK_V2_UPGRADE_LOG); - private static final LogExactlyOnce WARN_OF_DIRECTLY_REFERENCED_CREDENTIAL_PROVIDER = new LogExactlyOnce(SDK_V2_UPGRADE_LOG); - private static final LogExactlyOnce WARN_OF_CUSTOM_SIGNER = - new LogExactlyOnce(SDK_V2_UPGRADE_LOG); - private static final LogExactlyOnce WARN_OF_REQUEST_HANDLERS = new LogExactlyOnce(SDK_V2_UPGRADE_LOG); - private static final LogExactlyOnce WARN_ON_GET_OBJECT_METADATA = - new LogExactlyOnce(SDK_V2_UPGRADE_LOG); - /** - * Warns on an AWS V1 credential provider being referenced directly. + * Notes an AWS V1 credential provider being referenced directly. * @param name name of the credential provider */ public static void v1ProviderReferenced(String name) { - WARN_OF_DIRECTLY_REFERENCED_CREDENTIAL_PROVIDER.warn( + WARN_OF_DIRECTLY_REFERENCED_CREDENTIAL_PROVIDER.debug( "Directly referencing AWS SDK V1 credential provider {}. AWS SDK V1 credential " + "providers will be removed once S3A is upgraded to SDK V2", name); } - /** - * Warns on the v1 s3 client being requested. - */ - public static void v1S3ClientRequested() { - WARN_ON_GET_S3_CLIENT.warn( - "getAmazonS3ClientForTesting() will be removed as part of upgrading S3A to AWS SDK V2"); - } /** - * Warns when v1 credential providers are used with delegation tokens. - */ - public static void v1DelegationTokenCredentialProvidersUsed() { - WARN_ON_DELEGATION_TOKENS.warn( - "The credential provider interface has changed in AWS SDK V2, custom credential " - + "providers used in delegation tokens binding classes will need to be updated once " - + "S3A is upgraded to SDK V2"); - } - - /** - * Warns on use of custom signers. - */ - public static void v1CustomSignerUsed() { - WARN_OF_CUSTOM_SIGNER.warn( - "The signer interface has changed in AWS SDK V2, custom signers will need to be updated " - + "once S3A is upgraded to SDK V2"); - } - - /** - * Warns on use of request handlers. + * Notes use of request handlers. */ public static void v1RequestHandlersUsed() { WARN_OF_REQUEST_HANDLERS.warn( @@ -99,12 +71,4 @@ public static void v1RequestHandlersUsed() { + "once S3A is upgraded to SDK V2"); } - /** - * Warns on use of getObjectMetadata. - */ - public static void v1GetObjectMetadataCalled() { - WARN_ON_GET_OBJECT_METADATA.warn("getObjectMetadata() called. This operation and it's response " - + "will be changed as part of upgrading S3A to AWS SDK V2"); - } - } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md index 38a5823e87656..35a0f0254ebc4 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md @@ -129,6 +129,71 @@ See AWS SDK v2 issue [Simplify Modeled Message Marshalling #82](https://github.c note that it was filed in 2017, then implement your own workaround pending that issue being resolved. +### Compilation/Linkage Errors + +Any code making use of v1 sdk classes will fail if they +* Expect the v1 sdk classes to be on the classpath when `hadoop-aws` is declared as a dependency +* Use v1-SDK-compatible methods previously exported by the `S3AFileSystem` class and associated classes. +* Try to pass s3a classes to v1 SDK classes (e.g. credential providers). + +The sole solution to these problems is "move to the v2 SDK". + +Some `S3AUtils` methods are deleted +``` +cannot find symbol +[ERROR] symbol: method createAwsConf(org.apache.hadoop.conf.Configuration,java.lang.String) +[ERROR] location: class org.apache.hadoop.fs.s3a.S3AUtils +``` + +The signature and superclass of `AWSCredentialProviderList` has changed, which can surface in different +ways + +Signature mismatch +``` + cannot find symbol +[ERROR] symbol: method getCredentials() +[ERROR] location: variable credentials of type org.apache.hadoop.fs.s3a.AWSCredentialProviderList +``` + +It is no longer a V1 credential provider, cannot be used to pass credentials to a v1 SDK class +``` +incompatible types: org.apache.hadoop.fs.s3a.AWSCredentialProviderList cannot be converted to com.amazonaws.auth.AWSCredentialsProvider +``` + +### `AmazonS3` replaced by `S3Client`; factory and accessor changed. + +The V1 s3 client class `com.amazonaws.services.s3.AmazonS3` has been superseded by `software.amazon.awssdk.services.s3.S3Client` + +The `S3ClientFactory` interface has been replaced by one that creates a V2 `S3Client`. +* Custom implementations will need to be updated. +* The `InconsistentS3ClientFactory` class has been deleted. + + + +#### `S3AFileSystem` method changes + +##### `S3AFileSystem.getAmazonS3ClientForTesting()` + +The `S3AFileSystem.getAmazonS3ClientForTesting()` method has been been deleted. + +Compilation +``` +cannot find symbol +[ERROR] symbol: method getAmazonS3ClientForTesting(java.lang.String) +[ERROR] location: variable fs of type org.apache.hadoop.fs.s3a.S3AFileSystem +``` + +It has been replaced with a new method to return the V2 `S3Client` of the filesystem instance. +```java +public S3Client getAmazonS3V2ClientForTesting(String reason); +``` + +##### `S3AFileSystem.getObjectMetadata(Path path)` returns a v2 HeadResponse + +The `getObjectMetadata(Path)` call returns an instance of the +`software.amazon.awssdk.services.s3.model.HeadObjectResponse` class + + ### Credential Providers - Interface change: [com.amazonaws.auth.AWSCredentialsProvider](https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-core/src/main/java/com/amazonaws/auth/AWSCredentialsProvider.java) @@ -147,7 +212,10 @@ The v2 interface starts with "Aws". This is a very subtle change for developers to spot. Compilers _will_ detect and report the type mismatch. + ```java +package com.amazonaws.auth; + public interface AWSCredentialsProvider { public AWSCredentials getCredentials(); @@ -166,6 +234,8 @@ be invoked when the provider chain was being shut down. #### v2 `AwsCredentialsProvider` interface ```java +package software.amazon.awssdk.auth.credentials; + public interface AwsCredentialsProvider { AwsCredentials resolveCredentials(); @@ -180,6 +250,18 @@ public interface AwsCredentialsProvider { 4. If the interface implements `Closeable` or `AutoCloseable`, these will be invoked when the provider chain is being shut down. +#### `AWSCredentialProviderList` is now a V2 credential provider + +The class `org.apache.hadoop.fs.s3a.AWSCredentialProviderList` has moved from +being a v1 to a v2 credential provider. +Any code which obtains one of these lists, such as through a call to `S3AFileSystem.shareCredentials()` +may still link, but the v1 operations are no longer available. + +``` +java.lang.NoSuchMethodError: org.apache.hadoop.fs.s3a.AWSCredentialProviderList.getCredentials()Lcom/amazonaws/auth/AWSCredentials; + at org.apache.hadoop.fs.store.diag.S3ADiagnosticsInfo.validateFilesystem(S3ADiagnosticsInfo.java:903) + +``` ### Delegation Tokens @@ -194,14 +276,8 @@ the list; if they are `Closeable` or `AutoCloseable` then their `close()` method * Accordingly, providers may still perform background refreshes in separate threads; the S3A client will close its provider list when the filesystem itself is closed. -### AmazonS3 replaced by S3Client - -The s3 client is an instance of `S3Client` in V2 rather than `AmazonS3`. -For this reason, the `S3ClientFactory` will be deprecated and replaced by one that creates a V2 -`S3Client`. -The `getAmazonS3ClientForTesting()` method has been updated to return the `S3Client`. ### Signers diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md index 34ce7ea6034cc..bfec94b19c101 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/testing.md @@ -1004,9 +1004,6 @@ using an absolute XInclude reference to it. ## Failure Injection -**Warning do not enable any type of failure injection in production. The -following settings are for testing only.** - S3A provides an "Inconsistent S3 Client Factory" that can be used to simulate throttling by injecting random failures on S3 client requests. @@ -1018,55 +1015,8 @@ inconsistencies during testing of S3Guard. Now that S3 is consistent, injecting inconsistency is no longer needed during testing. -### Enabling the InconsistentS3CClientFactory - - -To enable the fault-injecting client via configuration, switch the -S3A client to use the "Inconsistent S3 Client Factory" when connecting to -S3: - -```xml - - fs.s3a.s3.client.factory.impl - org.apache.hadoop.fs.s3a.InconsistentS3ClientFactory - -``` - -The inconsistent client will, on every AWS SDK request, -generate a random number, and if less than the probability, -raise a 503 exception. - -```xml - - - fs.s3a.failinject.throttle.probability - 0.05 - -``` - -These exceptions are returned to S3; they do not test the -AWS SDK retry logic. - - -### Using the `InconsistentS3CClientFactory` in downstream integration tests - -The inconsistent client is shipped in the `hadoop-aws` JAR, so it can -be used in integration tests. - -## Testing S3Guard - -As part of the removal of S3Guard from the production code, the tests have been updated -so that - -* All S3Guard-specific tests have been deleted. -* All tests parameterized on S3Guard settings have had those test configurations removed. -* The maven profiles option to run tests with S3Guard have been removed. - -There is no need to test S3Guard -and so tests are lot faster. -(We developers are all happy) - -## Testing Assumed Roles +## Testing Assumed Roles Tests for the AWS Assumed Role credential provider require an assumed role to request. diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index f548b11ce8d62..ebb21f104d046 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -74,8 +74,8 @@ the classpath. ### `NoClassDefFoundError: software/amazon/awssdk/crt/s3/S3MetaRequest` The library `aws-crt.jar` is not on the classpath. Its classes -are not in the AWS `bundle.jar` file, yet are needed for uploading -and renaming objects. +are not in the AWS `bundle.jar` file, yet may be needed by some uses made +of the SDK. Fix: add. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index d233081ee6851..e3600bd829b94 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -64,7 +64,7 @@ public void setup() throws Exception { // unset S3CSE property from config to avoid pathIOE. conf.unset(Constants.S3_ENCRYPTION_ALGORITHM); fs.initialize(uri, conf); - s3 = fs.getAmazonS3ClientForTesting("mocking"); + s3 = fs.getAmazonS3V2ClientForTesting("mocking"); } public Configuration createConfiguration() { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java index 7ffb16833e044..63c2d0ccae22a 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java @@ -90,7 +90,7 @@ private void assertObjectHasLoggingGrant(Path path, boolean isFile) { S3AFileSystem fs = getFileSystem(); StoreContext storeContext = fs.createStoreContext(); - S3Client s3 = fs.getAmazonS3ClientForTesting("acls"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("acls"); String key = storeContext.pathToKey(path); if (!isFile) { key = key + "/"; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index f7bdaa62422ed..459da5d211099 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -118,7 +118,7 @@ public void testEndpoint() throws Exception { } else { conf.set(Constants.ENDPOINT, endpoint); fs = S3ATestUtils.createTestFileSystem(conf); - S3Client s3 = fs.getAmazonS3ClientForTesting("test endpoint"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("test endpoint"); String endPointRegion = ""; // Differentiate handling of "s3-" and "s3." based endpoint identifiers String[] endpointParts = StringUtils.split(endpoint, '.'); @@ -358,7 +358,7 @@ public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() try { fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - S3Client s3 = fs.getAmazonS3ClientForTesting("configuration"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("configuration"); assertNotNull(s3); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, @@ -393,7 +393,7 @@ public void testDefaultUserAgent() throws Exception { conf = new Configuration(); fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - S3Client s3 = fs.getAmazonS3ClientForTesting("User Agent"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("User Agent"); assertNotNull(s3); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); @@ -408,7 +408,7 @@ public void testCustomUserAgent() throws Exception { conf.set(Constants.USER_AGENT_PREFIX, "MyApp"); fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - S3Client s3 = fs.getAmazonS3ClientForTesting("User agent"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("User agent"); assertNotNull(s3); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); @@ -422,7 +422,7 @@ public void testRequestTimeout() throws Exception { conf = new Configuration(); conf.set(REQUEST_TIMEOUT, "120"); fs = S3ATestUtils.createTestFileSystem(conf); - S3Client s3 = fs.getAmazonS3ClientForTesting("Request timeout (ms)"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("Request timeout (ms)"); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); assertEquals("Configured " + REQUEST_TIMEOUT + @@ -542,7 +542,7 @@ public void testS3SpecificSignerOverride() throws IOException { config.set(AWS_REGION, "eu-west-1"); fs = S3ATestUtils.createTestFileSystem(config); - S3Client s3Client = fs.getAmazonS3ClientForTesting("testS3SpecificSignerOverride"); + S3Client s3Client = fs.getAmazonS3V2ClientForTesting("testS3SpecificSignerOverride"); StsClient stsClient = STSClientFactory.builder(config, fs.getBucket(), new AnonymousAWSCredentialsProvider(), "", diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java index 6e85f6bc783dc..28625e5755d18 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java @@ -411,7 +411,7 @@ private static T verifyNoTrailingSlash(String role, T o) { */ private GetBucketEncryptionResponse getDefaultEncryption() throws IOException { S3AFileSystem fs = getFileSystem(); - S3Client s3 = fs.getAmazonS3ClientForTesting("check default encryption"); + S3Client s3 = fs.getAmazonS3V2ClientForTesting("check default encryption"); try { return Invoker.once("getBucketEncryption()", fs.getBucket(), diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java index 1f85c8fdef304..a89f1744fd2f9 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java @@ -75,7 +75,7 @@ public void testDeleteOnExit() throws Exception { // unset S3CSE property from config to avoid pathIOE. conf.unset(Constants.S3_ENCRYPTION_ALGORITHM); testFs.initialize(uri, conf); - S3Client testS3 = testFs.getAmazonS3ClientForTesting("mocking"); + S3Client testS3 = testFs.getAmazonS3V2ClientForTesting("mocking"); Path path = new Path("/file"); String key = path.toUri().getPath().substring(1); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java index de0048c25581c..9555e8316380c 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java @@ -214,7 +214,7 @@ protected Configuration createConfiguration() { public void setup() throws Exception { super.setup(); S3AFileSystem fs = getFileSystem(); - s3client = fs.getAmazonS3ClientForTesting("markers"); + s3client = fs.getAmazonS3V2ClientForTesting("markers"); bucket = fs.getBucket(); Path base = new Path(methodPath(), "base"); From 4553cb24ee6ad4c21fff646b588e6eb101bf2e56 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Mon, 14 Aug 2023 16:26:04 +0100 Subject: [PATCH 13/20] HADOOP-18820. SDK v1 cut: S3AInternals move "public" s3 client api calls out of S3AFS and into a new S3AInternals interface which is implemented by a non-static inner class. This * has accessor method getS3AInternals() * calls out that the methods are internal * allows for future refactoring to move the implementation out of s3a FileSystem class *completely* * provides a home for any other low-level calls we want to make public Change-Id: I4f5d36afeff8b989920daad188fbf379876f789f Note: kept S3AFileSystem.getObjectMetadata(Path) but tagged as deprecated. --- .../org/apache/hadoop/fs/s3a/Constants.java | 3 +- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 162 +++++++++++------- .../apache/hadoop/fs/s3a/S3AInternals.java | 99 +++++++++++ .../tools/hadoop-aws/aws_sdk_upgrade.md | 108 ++++++++---- .../hadoop/fs/s3a/AbstractS3AMockTest.java | 2 +- .../hadoop/fs/s3a/AbstractS3ATestBase.java | 8 + .../hadoop/fs/s3a/EncryptionTestUtils.java | 2 +- .../hadoop/fs/s3a/ITestS3ACannedACLs.java | 2 +- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 69 ++++---- .../ITestS3AEncryptionSSEKMSDefaultKey.java | 2 +- ...estS3AEncryptionWithDefaultS3Settings.java | 3 +- .../hadoop/fs/s3a/ITestS3AMiscOperations.java | 2 +- .../fs/s3a/ITestS3ATemporaryCredentials.java | 4 +- .../hadoop/fs/s3a/TestS3ADeleteOnExit.java | 2 +- .../hadoop/fs/s3a/auth/ITestCustomSigner.java | 2 +- .../ITestSessionDelegationInFilesystem.java | 2 +- .../s3a/commit/AbstractITCommitProtocol.java | 4 +- .../ITestDirectoryMarkerListing.java | 2 +- .../scale/ITestS3AHugeFilesStorageClass.java | 4 +- .../ITestAWSStatisticCollection.java | 4 +- 20 files changed, 348 insertions(+), 138 deletions(-) create mode 100644 hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java index 8ed7017c4b9b2..90510643a36ef 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/Constants.java @@ -737,7 +737,8 @@ private Constants() { "fs.s3a.s3.client.factory.impl"; /** - * Default factory: {@value}. + * Default factory: + * {@code org.apache.hadoop.fs.s3a.DefaultS3ClientFactory}. */ @InterfaceAudience.Private @InterfaceStability.Unstable diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index d59279abbea2f..e57663b41b998 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -147,7 +147,6 @@ import org.apache.hadoop.fs.s3a.impl.StatusProbeEnum; import org.apache.hadoop.fs.s3a.impl.StoreContext; import org.apache.hadoop.fs.s3a.impl.StoreContextBuilder; -import org.apache.hadoop.fs.s3a.impl.V2Migration; import org.apache.hadoop.fs.s3a.prefetch.S3APrefetchingInputStream; import org.apache.hadoop.fs.s3a.tools.MarkerToolOperations; import org.apache.hadoop.fs.s3a.tools.MarkerToolOperationsImpl; @@ -386,6 +385,7 @@ public class S3AFileSystem extends FileSystem implements StreamCapabilities, private AWSCredentialProviderList credentials; private SignerManager signerManager; + private S3AInternals s3aInternals; /** * Page size for deletions. @@ -534,6 +534,8 @@ public void initialize(URI name, Configuration originalConf) super.initialize(uri, conf); setConf(conf); + s3aInternals = createS3AInternals(); + // look for encryption data // DT Bindings may override this setEncryptionSecrets( @@ -1348,25 +1350,6 @@ public int getDefaultPort() { return 0; } - /** - * A log for warning of aws s3 client use; only logs once per process. - */ - private static final LogExactlyOnce AWS_CLIENT_LOG = new LogExactlyOnce(LOG); - - /** - * Returns the S3 client used by this filesystem. - * Will log once first, to discourage use. - * Warning: this must only be used for testing, as it bypasses core - * S3A operations. - * @param reason a justification for requesting access. - * @return S3Client - */ - @VisibleForTesting - public S3Client getAmazonS3V2ClientForTesting(String reason) { - AWS_CLIENT_LOG.warn("Access to S3 client requested, reason {}", reason); - return s3Client; - } - /** * Set the client -used in mocking tests to force in a different client. * @param client client. @@ -1378,45 +1361,106 @@ protected void setAmazonS3Client(S3Client client) { } /** - * Get the region of a bucket. - * Invoked from StoreContext; consider an entry point. - * @return the region in which a bucket is located - * @throws AccessDeniedException if the caller lacks permission. - * @throws IOException on any failure. + * S3AInternals method. + * {@inheritDoc}. */ + @AuditEntryPoint @Retries.RetryTranslated - @InterfaceAudience.LimitedPrivate("diagnostics") public String getBucketLocation() throws IOException { - return getBucketLocation(bucket); + return s3aInternals.getBucketLocation(bucket); } /** - * Get the region of a bucket; fixing up the region so it can be used - * in the builders of other AWS clients. - * TODO: Review. Used only for S3Guard? - * Requires the caller to have the AWS role permission - * {@code s3:GetBucketLocation}. - * Retry policy: retrying, translated. - * @param bucketName the name of the bucket - * @return the region in which a bucket is located - * @throws AccessDeniedException if the caller lacks permission. - * @throws IOException on any failure. + * Create the S3AInternals; left as something mocking + * subclasses may want to override. + * @return the internal implementation */ - @VisibleForTesting - @AuditEntryPoint - @Retries.RetryTranslated - public String getBucketLocation(String bucketName) throws IOException { - final String region = trackDurationAndSpan( - STORE_EXISTS_PROBE, bucketName, null, () -> - invoker.retry("getBucketLocation()", bucketName, true, () -> - // If accessPoint then region is known from Arn - accessPoint != null - ? accessPoint.getRegion() - : s3Client.getBucketLocation(GetBucketLocationRequest.builder() - .bucket(bucketName) - .build()) - .locationConstraintAsString())); - return fixBucketRegion(region); + protected S3AInternals createS3AInternals() { + return new S3AInternalsImpl(); + } + + /** + * Get the S3AInternals. + * @return the internal implementation + */ + public S3AInternals getS3AInternals() { + return s3aInternals; + } + + /** + * Implementation of the S3A Internals operations; pulled out of S3AFileSystem to + * force code accessing it to call {@link #getS3AInternals()}. + */ + private final class S3AInternalsImpl implements S3AInternals { + + /** + * A log for warning of aws s3 client use; only logs once per process. + */ + private final LogExactlyOnce AWS_CLIENT_LOG = new LogExactlyOnce(LOG); + + @Override + public S3Client getAmazonS3V2ClientForTesting(String reason) { + AWS_CLIENT_LOG.warn("Access to S3 client requested, reason {}", reason); + return s3Client; + } + + /** + * S3AInternals method. + * {@inheritDoc}. + */ + @Override + @AuditEntryPoint + @Retries.RetryTranslated + public String getBucketLocation() throws IOException { + return s3aInternals.getBucketLocation(bucket); + } + + /** + * S3AInternals method. + * {@inheritDoc}. + */ + @Override + @AuditEntryPoint + @Retries.RetryTranslated + public String getBucketLocation(String bucketName) throws IOException { + final String region = trackDurationAndSpan( + STORE_EXISTS_PROBE, bucketName, null, () -> + invoker.retry("getBucketLocation()", bucketName, true, () -> + // If accessPoint then region is known from Arn + accessPoint != null + ? accessPoint.getRegion() + : s3Client.getBucketLocation(GetBucketLocationRequest.builder() + .bucket(bucketName) + .build()) + .locationConstraintAsString())); + return fixBucketRegion(region); + } + + /** + * S3AInternals method. + * {@inheritDoc}. + */ + @Override + @AuditEntryPoint + @Retries.RetryTranslated + public HeadObjectResponse getObjectMetadata(Path path) throws IOException { + return trackDurationAndSpan(INVOCATION_GET_FILE_STATUS, path, () -> + S3AFileSystem.this.getObjectMetadata(makeQualified(path), null, invoker, + "getObjectMetadata")); + } + + /** + * Get a shared copy of the AWS credentials, with its reference + * counter updated. + * Caller is required to call {@code close()} on this after + * they have finished using it. + * @param purpose what is this for? This is initially for logging + * @return a reference to shared credentials. + */ + public AWSCredentialProviderList shareCredentials(final String purpose) { + LOG.debug("Sharing credentials for: {}", purpose); + return credentials.share(); + } } /** @@ -1439,7 +1483,7 @@ public ChangeDetectionPolicy getChangeDetectionPolicy() { } /** - * Get the encryption algorithm of this endpoint. + * Get the encryption algorithm of this connector. * @return the encryption algorithm. */ public S3AEncryptionMethods getS3EncryptionAlgorithm() { @@ -1486,6 +1530,8 @@ private void initLocalDirAllocatorIfNotInitialized(Configuration conf) { * Get the bucket of this filesystem. * @return the bucket */ + @InterfaceAudience.Public + @InterfaceStability.Stable public String getBucket() { return bucket; } @@ -2483,20 +2529,17 @@ public int getMaxKeys() { * Low-level call to get at the object metadata. * This method is used in some external applications and so * must be viewed as a public entry point. - * Auditing: An audit entry point. + * @deprecated use S3AInternals API. * @param path path to the object. This will be qualified. * @return metadata * @throws IOException IO and object access problems. */ - @VisibleForTesting @AuditEntryPoint @InterfaceAudience.LimitedPrivate("utilities") @Retries.RetryTranslated - @InterfaceStability.Evolving + @Deprecated public HeadObjectResponse getObjectMetadata(Path path) throws IOException { - return trackDurationAndSpan(INVOCATION_GET_FILE_STATUS, path, () -> - getObjectMetadata(makeQualified(path), null, invoker, - "getObjectMetadata")); + return getS3AInternals().getObjectMetadata(path); } /** @@ -5582,4 +5625,5 @@ public boolean isCSEEnabled() { public boolean isMultipartUploadEnabled() { return isMultipartUploadEnabled; } + } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java new file mode 100644 index 0000000000000..03e950e8fc47c --- /dev/null +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.fs.s3a; + +import java.io.IOException; +import java.nio.file.AccessDeniedException; + +import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadObjectResponse; + +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.store.audit.AuditEntryPoint; + +/** + * This is an unstable interface for access to S3A Internal state, S3 operations + * and the S3 client connector itself. + */ +@InterfaceStability.Unstable +@InterfaceAudience.LimitedPrivate("testing/diagnostics") +public interface S3AInternals { + + /** + * Returns the S3 client used by this filesystem. + * Will log once first, to discourage use. + * Warning: this must only be used for testing, as it bypasses core + * S3A operations. + * Mocking note: this is the same s3client as is used by the owning + * filesystem; changes to this client will be reflected by changes + * in the behavior of that filesystem. + * @param reason a justification for requesting access. + * @return S3Client + */ + S3Client getAmazonS3V2ClientForTesting(String reason); + + /** + * Get the region of a bucket. + * Invoked from StoreContext; consider an entry point. + * @return the region in which a bucket is located + * @throws AccessDeniedException if the caller lacks permission. + * @throws IOException on any failure. + */ + @Retries.RetryTranslated + @AuditEntryPoint + String getBucketLocation() throws IOException; + + /** + * Get the region of a bucket; fixing up the region so it can be used + * in the builders of other AWS clients. + * Requires the caller to have the AWS role permission + * {@code s3:GetBucketLocation}. + * Retry policy: retrying, translated. + * @param bucketName the name of the bucket + * @return the region in which a bucket is located + * @throws AccessDeniedException if the caller lacks permission. + * @throws IOException on any failure. + */ + @AuditEntryPoint + @Retries.RetryTranslated + String getBucketLocation(String bucketName) throws IOException; + + /** + * Low-level call to get at the object metadata. + * Auditing: An audit entry point. + * @param path path to the object. This will be qualified. + * @return metadata + * @throws IOException IO and object access problems. + */ + @AuditEntryPoint + @Retries.RetryTranslated + HeadObjectResponse getObjectMetadata(Path path) throws IOException; + + /** + * Get a shared copy of the AWS credentials, with its reference + * counter updated. + * Caller is required to call {@code close()} on this after + * they have finished using it. + * @param purpose what is this for? This is for logging + * @return a reference to shared credentials. + */ + AWSCredentialProviderList shareCredentials(String purpose); +} diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md index 35a0f0254ebc4..caa86d657dcd2 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md @@ -45,7 +45,7 @@ A complete list of the changes can be found in the As the module name is lost, in hadoop releases a large JAR file with the name "bundle" is now part of the distribution. -This is the AWS v2 SDK shaded artifact. +This is the AWS V2 SDK shaded artifact. The new and old SDKs can co-exist; the only place that the hadoop code may still use the original SDK is when a non-standard V1 AWS credential @@ -64,13 +64,13 @@ or compatibility of dependent libraries. V1 Credential providers are *only* supported when the V1 SDK is on the classpath. -The standard set of v1 credential providers used in hadoop deployments are -automatically remapped to v2 equivalents, +The standard set of V1 credential providers used in hadoop deployments are +automatically remapped to V2 equivalents, while the stable hadoop providers have been upgraded in place; their names are unchanged. As result, standard cluster configurations should seamlessly upgrade. -| v1 Credential Provider | Remapped V2 substitute | +| V1 Credential Provider | Remapped V2 substitute | |-------------------------------------------------------------|----------------------------------------------------------------------------------| | `com.amazonaws.auth.AnonymousAWSCredentials` | `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider` | | `com.amazonaws.auth.EnvironmentVariableCredentialsProvider` | `software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider` | @@ -87,16 +87,16 @@ they are likely to have moved different factory/builder mechanisms. Identify the changed classes and use their names in the `fs.s3a.aws.credentials.provider` option. -If a v2 equivalent is not found; provided the v1 SDK is added to the classpath, +If a V2 equivalent is not found; provided the V1 SDK is added to the classpath, it should still be possible to use the existing classes. #### Private/third-party credential providers -Provided the v1 SDK is added to the classpath, +Provided the V1 SDK is added to the classpath, it should still be possible to use the existing classes. -Adding a v2 equivalent is the recommended long-term solution. +Adding a V2 equivalent is the recommended long-term solution. #### Private subclasses of the Hadoop credential providers @@ -125,18 +125,18 @@ were under `com.amazonaws`. Most of these changes simply create what will feel to be gratuitous migration effort; the removable of the `Serializable` nature from all message response classes can potentially break applications -such as anything passing them between Spark workers. -See AWS SDK v2 issue [Simplify Modeled Message Marshalling #82](https://github.com/aws/aws-sdk-java-v2/issues/82), +See AWS SDK V2 issue [Simplify Modeled Message Marshalling #82](https://github.com/aws/aws-sdk-java-v2/issues/82), note that it was filed in 2017, then implement your own workaround pending that issue being resolved. ### Compilation/Linkage Errors -Any code making use of v1 sdk classes will fail if they -* Expect the v1 sdk classes to be on the classpath when `hadoop-aws` is declared as a dependency -* Use v1-SDK-compatible methods previously exported by the `S3AFileSystem` class and associated classes. -* Try to pass s3a classes to v1 SDK classes (e.g. credential providers). +Any code making use of V1 sdk classes will fail if they +* Expect the V1 sdk classes to be on the classpath when `hadoop-aws` is declared as a dependency +* Use V1-SDK-compatible methods previously exported by the `S3AFileSystem` class and associated classes. +* Try to pass s3a classes to V1 SDK classes (e.g. credential providers). -The sole solution to these problems is "move to the v2 SDK". +The sole solution to these problems is "move to the V2 SDK". Some `S3AUtils` methods are deleted ``` @@ -155,24 +155,51 @@ Signature mismatch [ERROR] location: variable credentials of type org.apache.hadoop.fs.s3a.AWSCredentialProviderList ``` -It is no longer a V1 credential provider, cannot be used to pass credentials to a v1 SDK class +It is no longer a V1 credential provider, cannot be used to pass credentials to a V1 SDK class ``` incompatible types: org.apache.hadoop.fs.s3a.AWSCredentialProviderList cannot be converted to com.amazonaws.auth.AWSCredentialsProvider ``` ### `AmazonS3` replaced by `S3Client`; factory and accessor changed. -The V1 s3 client class `com.amazonaws.services.s3.AmazonS3` has been superseded by `software.amazon.awssdk.services.s3.S3Client` +The V1 s3 client class `com.amazonaws.services.s3.AmazonS3` has been superseded by +`software.amazon.awssdk.services.s3.S3Client` The `S3ClientFactory` interface has been replaced by one that creates a V2 `S3Client`. * Custom implementations will need to be updated. * The `InconsistentS3ClientFactory` class has been deleted. +#### `S3AFileSystem` method changes: `S3AInternals`. +The low-level s3 operations/client accessors have been moved into a new interface, +`org.apache.hadoop.fs.s3a.S3AInternals`, which must be accessed via the +`S3AFileSystem.getS3AInternals()` method. +They have also been updated to return V2 SDK classes. -#### `S3AFileSystem` method changes +```java +@InterfaceStability.Unstable +@InterfaceAudience.LimitedPrivate("testing/diagnostics") +public interface S3AInternals { + S3Client getAmazonS3V2ClientForTesting(String reason); + + @Retries.RetryTranslated + @AuditEntryPoint + String getBucketLocation() throws IOException; + + @AuditEntryPoint + @Retries.RetryTranslated + String getBucketLocation(String bucketName) throws IOException; + + @AuditEntryPoint + @Retries.RetryTranslated + HeadObjectResponse getObjectMetadata(Path path) throws IOException; + + AWSCredentialProviderList shareCredentials(final String purpose); +} +``` -##### `S3AFileSystem.getAmazonS3ClientForTesting()` + +##### `S3AFileSystem.getAmazonS3ClientForTesting(String)` moved and return type changed The `S3AFileSystem.getAmazonS3ClientForTesting()` method has been been deleted. @@ -183,16 +210,39 @@ cannot find symbol [ERROR] location: variable fs of type org.apache.hadoop.fs.s3a.S3AFileSystem ``` -It has been replaced with a new method to return the V2 `S3Client` of the filesystem instance. +It has been replaced by an `S3AInternals` equivalent which returns the V2 `S3Client` of the filesystem instance. + +```java +((S3AFilesystem)fs).getAmazonS3ClientForTesting("testing") +``` + ```java -public S3Client getAmazonS3V2ClientForTesting(String reason); +((S3AFilesystem)fs).getS3AInternals().getAmazonS3ClientForTesting("testing") ``` -##### `S3AFileSystem.getObjectMetadata(Path path)` returns a v2 HeadResponse +##### `S3AFileSystem.getObjectMetadata(Path path)` moved and return type changed The `getObjectMetadata(Path)` call returns an instance of the -`software.amazon.awssdk.services.s3.model.HeadObjectResponse` class +`software.amazon.awssdk.services.s3.model.HeadObjectResponse` class + +```java +((S3AFilesystem)fs).getObjectMetadata(path) +``` + +```java +((S3AFilesystem)fs).getS3AInternals().getObjectMetadata(path) +``` + +The original `S3AFileSystem` method has been retained (and forwards to the new interface's +implementation), however its return type has changed and is marked as deprecated. +##### `AWSCredentialProviderList shareCredentials(String)` moved to `S3AInternals` + +The operation to share a reference-counted access to the AWS credentials used +by the S3A FS has been moved to `S3AInternals`. + +This is very much an implementation method, used to allow extension modules to share +an authentication chain into other AWS SDK client services (dynamoDB, etc.). ### Credential Providers @@ -205,10 +255,10 @@ The change in interface will mean that custom credential providers will need to implement `software.amazon.awssdk.auth.credentials.AwsCredentialsProvider` instead of `com.amazonaws.auth.AWSCredentialsProvider`. -#### Original v1 `AWSCredentialsProvider` interface +#### Original V1 `AWSCredentialsProvider` interface Note how the interface begins with the capitalized "AWS" acronym. -The v2 interface starts with "Aws". This is a very subtle change +The V2 interface starts with "Aws". This is a very subtle change for developers to spot. Compilers _will_ detect and report the type mismatch. @@ -231,7 +281,7 @@ if available, would be invoked in preference to using any constructor. If the interface implemented `Closeable` or `AutoCloseable`, these would be invoked when the provider chain was being shut down. -#### v2 `AwsCredentialsProvider` interface +#### V2 `AwsCredentialsProvider` interface ```java package software.amazon.awssdk.auth.credentials; @@ -253,14 +303,12 @@ public interface AwsCredentialsProvider { #### `AWSCredentialProviderList` is now a V2 credential provider The class `org.apache.hadoop.fs.s3a.AWSCredentialProviderList` has moved from -being a v1 to a v2 credential provider. -Any code which obtains one of these lists, such as through a call to `S3AFileSystem.shareCredentials()` -may still link, but the v1 operations are no longer available. +being a V1 to a V2 credential provider; even if an instance can be created with +existing code, the V1 methods will not resolve: ``` java.lang.NoSuchMethodError: org.apache.hadoop.fs.s3a.AWSCredentialProviderList.getCredentials()Lcom/amazonaws/auth/AWSCredentials; - at org.apache.hadoop.fs.store.diag.S3ADiagnosticsInfo.validateFilesystem(S3ADiagnosticsInfo.java:903) - + at org.apache.hadoop.fs.store.diag.S3ADiagnosticsInfo.validateFilesystem(S3ADiagnosticsInfo.java:903) ``` ### Delegation Tokens @@ -277,8 +325,6 @@ the list; if they are `Closeable` or `AutoCloseable` then their `close()` method the S3A client will close its provider list when the filesystem itself is closed. - - ### Signers Interface change: [com.amazonaws.auth.Signer](https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-core/src/main/java/com/amazonaws/auth/Signer.java) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java index e3600bd829b94..b6da282e390a7 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3AMockTest.java @@ -64,7 +64,7 @@ public void setup() throws Exception { // unset S3CSE property from config to avoid pathIOE. conf.unset(Constants.S3_ENCRYPTION_ALGORITHM); fs.initialize(uri, conf); - s3 = fs.getAmazonS3V2ClientForTesting("mocking"); + s3 = fs.getS3AInternals().getAmazonS3V2ClientForTesting("mocking"); } public Configuration createConfiguration() { diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java index e90ad8b73efae..93f41cfaa81bb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/AbstractS3ATestBase.java @@ -210,6 +210,14 @@ public S3AFileSystem getFileSystem() { return (S3AFileSystem) super.getFileSystem(); } + /** + * Get the {@link S3AInternals} internal access for the + * test filesystem. + * @return internals. + */ + public S3AInternals getS3AInternals() { + return getFileSystem().getS3AInternals(); + } /** * Describe a test in the logs. * @param text text to print diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java index 794480d4409fb..8d927dc957b16 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/EncryptionTestUtils.java @@ -69,7 +69,7 @@ public static void assertEncrypted(S3AFileSystem fs, final S3AEncryptionMethods algorithm, final String kmsKeyArn) throws IOException { - HeadObjectResponse md = fs.getObjectMetadata(path); + HeadObjectResponse md = fs.getS3AInternals().getObjectMetadata(path); String details = String.format( "file %s with encryption algorithm %s and key %s", path, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java index 63c2d0ccae22a..6924065dbeeb4 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ACannedACLs.java @@ -90,7 +90,7 @@ private void assertObjectHasLoggingGrant(Path path, boolean isFile) { S3AFileSystem fs = getFileSystem(); StoreContext storeContext = fs.createStoreContext(); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("acls"); + S3Client s3 = getS3AInternals().getAmazonS3V2ClientForTesting("acls"); String key = storeContext.pathToKey(path); if (!isFile) { key = key + "/"; diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index 459da5d211099..f47e498edf9bb 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -7,7 +7,7 @@ * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -18,6 +18,18 @@ package org.apache.hadoop.fs.s3a; +import java.io.File; +import java.io.IOException; +import java.net.URI; +import java.security.PrivilegedExceptionAction; + +import org.assertj.core.api.Assertions; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.TemporaryFolder; +import org.junit.rules.Timeout; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import software.amazon.awssdk.core.client.config.SdkClientConfiguration; import software.amazon.awssdk.core.client.config.SdkClientOption; import software.amazon.awssdk.core.interceptor.ExecutionAttributes; @@ -38,28 +50,15 @@ import org.apache.hadoop.fs.contract.ContractTestUtils; import org.apache.hadoop.fs.s3a.auth.STSClientFactory; import org.apache.hadoop.fs.s3native.S3xLoginHelper; -import org.apache.hadoop.test.GenericTestUtils; - -import org.assertj.core.api.Assertions; -import org.junit.Rule; -import org.junit.Test; -import org.junit.rules.Timeout; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; -import java.io.File; -import java.net.URI; -import java.security.PrivilegedExceptionAction; - import org.apache.hadoop.security.ProviderUtils; import org.apache.hadoop.security.UserGroupInformation; import org.apache.hadoop.security.alias.CredentialProvider; import org.apache.hadoop.security.alias.CredentialProviderFactory; +import org.apache.hadoop.test.GenericTestUtils; import org.apache.hadoop.util.VersionInfo; import org.apache.http.HttpStatus; -import org.junit.rules.TemporaryFolder; +import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.Constants.*; import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; @@ -91,6 +90,23 @@ public class ITestS3AConfiguration { @Rule public final TemporaryFolder tempDir = new TemporaryFolder(); + /** + * Get the S3 client of the active filesystem. + * @param reason why? + * @return the client + */ + private final S3Client getS3Client(String reason) { + return requireNonNull(getS3AInternals().getAmazonS3V2ClientForTesting(reason)); + } + + /** + * Get the internals of the active filesystem. + * @return the internals + */ + private S3AInternals getS3AInternals() { + return fs.getS3AInternals(); + } + /** * Test if custom endpoint is picked up. *

@@ -118,7 +134,7 @@ public void testEndpoint() throws Exception { } else { conf.set(Constants.ENDPOINT, endpoint); fs = S3ATestUtils.createTestFileSystem(conf); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("test endpoint"); + S3Client s3 = getS3Client("test endpoint"); String endPointRegion = ""; // Differentiate handling of "s3-" and "s3." based endpoint identifiers String[] endpointParts = StringUtils.split(endpoint, '.'); @@ -129,9 +145,7 @@ public void testEndpoint() throws Exception { } else { fail("Unexpected endpoint"); } - // TODO: review way to get the bucket region. - String region = s3.getBucketLocation(b -> b.bucket(fs.getUri().getHost())) - .locationConstraintAsString(); + String region = getS3AInternals().getBucketLocation(); assertEquals("Endpoint config setting and bucket location differ: ", endPointRegion, region); } @@ -358,8 +372,7 @@ public void shouldBeAbleToSwitchOnS3PathStyleAccessViaConfigProperty() try { fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("configuration"); - assertNotNull(s3); + S3Client s3 = getS3Client("configuration"); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); @@ -393,8 +406,7 @@ public void testDefaultUserAgent() throws Exception { conf = new Configuration(); fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("User Agent"); - assertNotNull(s3); + S3Client s3 = getS3Client("User Agent"); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); Assertions.assertThat(clientConfiguration.option(SdkClientOption.CLIENT_USER_AGENT)) @@ -408,8 +420,7 @@ public void testCustomUserAgent() throws Exception { conf.set(Constants.USER_AGENT_PREFIX, "MyApp"); fs = S3ATestUtils.createTestFileSystem(conf); assertNotNull(fs); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("User agent"); - assertNotNull(s3); + S3Client s3 = getS3Client("User agent"); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); Assertions.assertThat(clientConfiguration.option(SdkClientOption.CLIENT_USER_AGENT)) @@ -422,7 +433,7 @@ public void testRequestTimeout() throws Exception { conf = new Configuration(); conf.set(REQUEST_TIMEOUT, "120"); fs = S3ATestUtils.createTestFileSystem(conf); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("Request timeout (ms)"); + S3Client s3 = getS3Client("Request timeout (ms)"); SdkClientConfiguration clientConfiguration = getField(s3, SdkClientConfiguration.class, "clientConfiguration"); assertEquals("Configured " + REQUEST_TIMEOUT + @@ -436,7 +447,7 @@ public void testCloseIdempotent() throws Throwable { conf = new Configuration(); fs = S3ATestUtils.createTestFileSystem(conf); AWSCredentialProviderList credentials = - fs.shareCredentials("testCloseIdempotent"); + getS3AInternals().shareCredentials("testCloseIdempotent"); credentials.close(); fs.close(); assertTrue("Closing FS didn't close credentials " + credentials, @@ -542,7 +553,7 @@ public void testS3SpecificSignerOverride() throws IOException { config.set(AWS_REGION, "eu-west-1"); fs = S3ATestUtils.createTestFileSystem(config); - S3Client s3Client = fs.getAmazonS3V2ClientForTesting("testS3SpecificSignerOverride"); + S3Client s3Client = getS3Client("testS3SpecificSignerOverride"); StsClient stsClient = STSClientFactory.builder(config, fs.getBucket(), new AnonymousAWSCredentialsProvider(), "", diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java index 3a72206641452..7e399f347100f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionSSEKMSDefaultKey.java @@ -51,7 +51,7 @@ protected S3AEncryptionMethods getSSEAlgorithm() { @Override protected void assertEncrypted(Path path) throws IOException { - HeadObjectResponse md = getFileSystem().getObjectMetadata(path); + HeadObjectResponse md = getS3AInternals().getObjectMetadata(path); assertEquals("SSE Algorithm", EncryptionTestUtils.AWS_KMS_SSE_ALGORITHM, md.serverSideEncryptionAsString()); assertThat(md.ssekmsKeyId(), containsString("arn:aws:kms:")); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java index 95ceae608e17b..1b25846fafddf 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEncryptionWithDefaultS3Settings.java @@ -118,7 +118,8 @@ public void testEncryptionOverRename() throws Throwable { S3AFileSystem fs = getFileSystem(); Path path = path(getMethodName() + "find-encryption-algo"); ContractTestUtils.touch(fs, path); - String sseAlgorithm = fs.getObjectMetadata(path).serverSideEncryptionAsString(); + String sseAlgorithm = getS3AInternals().getObjectMetadata(path) + .serverSideEncryptionAsString(); if(StringUtils.isBlank(sseAlgorithm) || !sseAlgorithm.equals(AWS_KMS_SSE_ALGORITHM)) { skip("Test bucket is not configured with " + AWS_KMS_SSE_ALGORITHM); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java index 28625e5755d18..285e6b62bc563 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java @@ -411,7 +411,7 @@ private static T verifyNoTrailingSlash(String role, T o) { */ private GetBucketEncryptionResponse getDefaultEncryption() throws IOException { S3AFileSystem fs = getFileSystem(); - S3Client s3 = fs.getAmazonS3V2ClientForTesting("check default encryption"); + S3Client s3 = getS3AInternals().getAmazonS3V2ClientForTesting("check default encryption"); try { return Invoker.once("getBucketEncryption()", fs.getBucket(), diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java index 35bb709f659f9..290a4d995c757 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ATemporaryCredentials.java @@ -115,7 +115,7 @@ protected Configuration createConfiguration() { public void testSTS() throws IOException { Configuration conf = getContract().getConf(); S3AFileSystem testFS = getFileSystem(); - credentials = testFS.shareCredentials("testSTS"); + credentials = getS3AInternals().shareCredentials("testSTS"); String bucket = testFS.getBucket(); StsClientBuilder builder = STSClientFactory.builder( @@ -363,7 +363,7 @@ public E expectedSessionRequestFailure( final String region, final String exceptionText) throws Exception { try(AWSCredentialProviderList parentCreds = - getFileSystem().shareCredentials("test"); + getS3AInternals().shareCredentials("test"); DurationInfo ignored = new DurationInfo(LOG, "requesting credentials")) { Configuration conf = new Configuration(getContract().getConf()); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java index a89f1744fd2f9..0ec96624ef171 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/TestS3ADeleteOnExit.java @@ -75,7 +75,7 @@ public void testDeleteOnExit() throws Exception { // unset S3CSE property from config to avoid pathIOE. conf.unset(Constants.S3_ENCRYPTION_ALGORITHM); testFs.initialize(uri, conf); - S3Client testS3 = testFs.getAmazonS3V2ClientForTesting("mocking"); + S3Client testS3 = testFs.getS3AInternals().getAmazonS3V2ClientForTesting("mocking"); Path path = new Path("/file"); String key = path.toUri().getPath().substring(1); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java index cdf89211fd7fc..ad7d59a7319cf 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/ITestCustomSigner.java @@ -152,7 +152,7 @@ private Configuration createTestConfig(String identifier) { } private String determineRegion(String bucketName) throws IOException { - return getFileSystem().getBucketLocation(bucketName); + return getS3AInternals().getBucketLocation(bucketName); } @Private diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java index 28784b17c9ce8..ebad90336f7d0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/auth/delegation/ITestSessionDelegationInFilesystem.java @@ -582,7 +582,7 @@ public void testDelegationBindingMismatch2() throws Throwable { protected HeadBucketResponse readLandsatMetadata(final S3AFileSystem delegatedFS) throws Exception { AWSCredentialProviderList testingCreds - = delegatedFS.shareCredentials("testing"); + = delegatedFS.getS3AInternals().shareCredentials("testing"); URI landsat = new URI(DEFAULT_CSVTEST_FILE); DefaultS3ClientFactory factory diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java index 44854e84d894c..258c34b5cb84f 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/commit/AbstractITCommitProtocol.java @@ -741,8 +741,8 @@ private void validateContent(Path dir, */ private void validateStorageClass(Path dir, String expectedStorageClass) throws Exception { Path expectedFile = getPart0000(dir); - S3AFileSystem fs = getFileSystem(); - String actualStorageClass = fs.getObjectMetadata(expectedFile).storageClassAsString(); + String actualStorageClass = getS3AInternals().getObjectMetadata(expectedFile) + .storageClassAsString(); Assertions.assertThat(actualStorageClass) .describedAs("Storage class of object %s", expectedFile) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java index 9555e8316380c..4e02d01458fb2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/performance/ITestDirectoryMarkerListing.java @@ -214,7 +214,7 @@ protected Configuration createConfiguration() { public void setup() throws Exception { super.setup(); S3AFileSystem fs = getFileSystem(); - s3client = fs.getAmazonS3V2ClientForTesting("markers"); + s3client = getS3AInternals().getAmazonS3V2ClientForTesting("markers"); bucket = fs.getBucket(); Path base = new Path(methodPath(), "base"); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java index 006c989604fd7..ccc71c58644f2 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/scale/ITestS3AHugeFilesStorageClass.java @@ -125,8 +125,8 @@ private void skipQuietly(String text) { } protected void assertStorageClass(Path hugeFile) throws IOException { - S3AFileSystem fs = getFileSystem(); - String actual = fs.getObjectMetadata(hugeFile).storageClassAsString(); + + String actual = getS3AInternals().getObjectMetadata(hugeFile).storageClassAsString(); assertTrue( "Storage class of object is " + actual + ", expected " + STORAGE_CLASS_REDUCED_REDUNDANCY, diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/statistics/ITestAWSStatisticCollection.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/statistics/ITestAWSStatisticCollection.java index e7696996dbd1a..8c97d896edbde 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/statistics/ITestAWSStatisticCollection.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/statistics/ITestAWSStatisticCollection.java @@ -53,7 +53,7 @@ public void testLandsatStatistics() throws Throwable { conf.unset("fs.s3a.bucket.landsat-pds.endpoint"); try (S3AFileSystem fs = (S3AFileSystem) path.getFileSystem(conf)) { - fs.getObjectMetadata(path); + fs.getS3AInternals().getObjectMetadata(path); IOStatistics iostats = fs.getIOStatistics(); assertThatStatisticCounter(iostats, STORE_IO_REQUEST.getSymbol()) @@ -71,7 +71,7 @@ public void testCommonCrawlStatistics() throws Throwable { conf.set(ENDPOINT, DEFAULT_ENDPOINT); try (S3AFileSystem fs = (S3AFileSystem) path.getFileSystem(conf)) { - fs.getObjectMetadata(path); + fs.getS3AInternals().getObjectMetadata(path); IOStatistics iostats = fs.getIOStatistics(); assertThatStatisticCounter(iostats, STORE_IO_REQUEST.getSymbol()) From 99273ebf497ca9c0b60dad3873a7a22d4b286014 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 15 Aug 2023 14:26:36 +0100 Subject: [PATCH 14/20] HADOOP-18820. checkstyle, spotbugs, feedback and doc/xml updates code changes * removed public S3AFileSystem.getObjectMetadata * com.amazonaws.auth.profile.ProfileCredentialsProvider remapped to v2 equivalent. xml changes * core-default `fs.s3a.aws.credentials.provider` list updated to v2 * description section thinned out; site docs contains this and is up to date * repeat for all examples of the xml setting in the site docs. site doc changes * moved credential provider section above code changes in upgrading doc * updated index.md, assumed_roles.md to match changes Change-Id: Ic39bb19d807d03bf1cf4769981af391072c86069 --- .../src/main/resources/core-default.xml | 52 +--- .../dev-support/findbugs-exclude.xml | 5 + .../apache/hadoop/fs/s3a/S3AFileSystem.java | 11 +- .../auth/CredentialProviderListFactory.java | 16 +- .../auth/IAMInstanceCredentialsProvider.java | 2 +- .../tools/hadoop-aws/assumed_roles.md | 2 +- .../tools/hadoop-aws/aws_sdk_upgrade.md | 225 ++++++++++-------- .../tools/hadoop-aws/delegation_tokens.md | 2 +- .../site/markdown/tools/hadoop-aws/index.md | 144 ++++------- .../tools/hadoop-aws/troubleshooting_s3a.md | 48 ---- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 2 +- 11 files changed, 211 insertions(+), 298 deletions(-) diff --git a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml index 5f841bd233d34..8cc44e6990b18 100644 --- a/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml +++ b/hadoop-common-project/hadoop-common/src/main/resources/core-default.xml @@ -1241,17 +1241,24 @@ AWS secret key used by S3A file system. Omit for IAM role-based or provider-based authentication. + + fs.s3a.session.token + Session token, when using org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider + as one of the providers. + + + fs.s3a.aws.credentials.provider org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider, org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, - com.amazonaws.auth.EnvironmentVariableCredentialsProvider, + software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider, org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider Comma-separated class names of credential provider classes which implement - com.amazonaws.auth.AWSCredentialsProvider. + software.amazon.awssdk.auth.credentials.AwsCredentialsProvider. When S3A delegation tokens are not enabled, this list will be used to directly authenticate with S3 and other AWS services. @@ -1259,43 +1266,6 @@ token binding it may be used to communicate wih the STS endpoint to request session/role credentials. - - These are loaded and queried in sequence for a valid set of credentials. - Each listed class must implement one of the following means of - construction, which are attempted in order: - * a public constructor accepting java.net.URI and - org.apache.hadoop.conf.Configuration, - * a public constructor accepting org.apache.hadoop.conf.Configuration, - * a public static method named getInstance that accepts no - arguments and returns an instance of - com.amazonaws.auth.AWSCredentialsProvider, or - * a public default constructor. - - Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider allows - anonymous access to a publicly accessible S3 bucket without any credentials. - Please note that allowing anonymous access to an S3 bucket compromises - security and therefore is unsuitable for most use cases. It can be useful - for accessing public data sets without requiring AWS credentials. - - If unspecified, then the default list of credential provider classes, - queried in sequence, is: - * org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider: looks - for session login secrets in the Hadoop configuration. - * org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider: - Uses the values of fs.s3a.access.key and fs.s3a.secret.key. - * com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports - configuration of AWS access key ID and secret access key in - environment variables named AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, - and AWS_SESSION_TOKEN as documented in the AWS SDK. - * org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider: picks up - IAM credentials of any EC2 VM or AWS container in which the process is running. - - - - - fs.s3a.session.token - Session token, when using org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider - as one of the providers. @@ -1393,10 +1363,10 @@ Note: for job submission to actually collect these tokens, Kerberos must be enabled. - Options are: + Bindings available in hadoop-aws are: org.apache.hadoop.fs.s3a.auth.delegation.SessionTokenBinding org.apache.hadoop.fs.s3a.auth.delegation.FullCredentialsTokenBinding - and org.apache.hadoop.fs.s3a.auth.delegation.RoleTokenBinding + org.apache.hadoop.fs.s3a.auth.delegation.RoleTokenBinding diff --git a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml index 39a9e51ac8125..359ac0e80dd1b 100644 --- a/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml +++ b/hadoop-tools/hadoop-aws/dev-support/findbugs-exclude.xml @@ -64,6 +64,11 @@ + + + + + diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index e57663b41b998..cc9c173689fe4 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -999,7 +999,7 @@ private void bindAWSClient(URI name, boolean dtEnabled) throws IOException { * @param parameters parameter object * @throws IOException on any IO problem */ - private synchronized void createS3AsyncClient(S3ClientFactory clientFactory, + private void createS3AsyncClient(S3ClientFactory clientFactory, S3ClientFactory.S3ClientCreationParameters parameters) throws IOException { s3AsyncClient = clientFactory.createS3AsyncClient(getUri(), parameters); } @@ -1207,7 +1207,7 @@ public RequestFactory getRequestFactory() { * Get the S3 Async client; synchronized to keep spotbugs quiet. * @return the async s3 client. */ - private synchronized S3AsyncClient getS3AsyncClient() { + private S3AsyncClient getS3AsyncClient() { return s3AsyncClient; } @@ -1393,14 +1393,9 @@ public S3AInternals getS3AInternals() { */ private final class S3AInternalsImpl implements S3AInternals { - /** - * A log for warning of aws s3 client use; only logs once per process. - */ - private final LogExactlyOnce AWS_CLIENT_LOG = new LogExactlyOnce(LOG); - @Override public S3Client getAmazonS3V2ClientForTesting(String reason) { - AWS_CLIENT_LOG.warn("Access to S3 client requested, reason {}", reason); + LOG.debug("Access to S3 client requested, reason {}", reason); return s3Client; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java index 443e7d8185642..b106777dd29cc 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/CredentialProviderListFactory.java @@ -35,6 +35,7 @@ import org.slf4j.LoggerFactory; import software.amazon.awssdk.auth.credentials.AwsCredentialsProvider; import software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider; +import software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider; import org.apache.hadoop.classification.VisibleForTesting; import org.apache.hadoop.conf.Configuration; @@ -100,7 +101,7 @@ public final class CredentialProviderListFactory { public static final String EC2_IAM_CREDENTIALS_V2 = IAMInstanceCredentialsProvider.class.getName(); - /** V1 credential provider: {@value}. */ + /** V1 env var credential provider: {@value}. */ public static final String ENVIRONMENT_CREDENTIALS_V1 = "com.amazonaws.auth.EnvironmentVariableCredentialsProvider"; @@ -108,6 +109,14 @@ public final class CredentialProviderListFactory { public static final String ENVIRONMENT_CREDENTIALS_V2 = EnvironmentVariableCredentialsProvider.class.getName(); + /** V1 profile credential provider: {@value}. */ + public static final String PROFILE_CREDENTIALS_V1 = + "com.amazonaws.auth.profile.ProfileCredentialsProvider"; + + /** V2 environment variables credential provider. */ + public static final String PROFILE_CREDENTIALS_V2 = + ProfileCredentialsProvider.class.getName(); + /** * Private map of v1 to v2 credential provider name mapping. */ @@ -176,7 +185,10 @@ private static Map initCredentialProvidersMap() { EC2_IAM_CREDENTIALS_V2); v1v2CredentialProviderMap.put(EC2_IAM_CREDENTIALS_V1, EC2_IAM_CREDENTIALS_V2); - v1v2CredentialProviderMap.put(ENVIRONMENT_CREDENTIALS_V1, ENVIRONMENT_CREDENTIALS_V2); + v1v2CredentialProviderMap.put(ENVIRONMENT_CREDENTIALS_V1, + ENVIRONMENT_CREDENTIALS_V2); + v1v2CredentialProviderMap.put(PROFILE_CREDENTIALS_V1, + PROFILE_CREDENTIALS_V2); return v1v2CredentialProviderMap; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java index a28a62fd8ebba..2e39b275b4a4d 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/auth/IAMInstanceCredentialsProvider.java @@ -32,7 +32,7 @@ /** * This is an IAM credential provider which wraps - * an {@code EC2ContainerCredentialsProviderWrapper} + * an {@code ContainerCredentialsProvider} * to provide credentials when the S3A connector is instantiated on AWS EC2 * or the AWS container services. *

diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md index 094ea5668c05e..ea53b2e1fa9e3 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/assumed_roles.md @@ -195,7 +195,7 @@ Here are the full set of configuration options. fs.s3a.assumed.role.credentials.provider org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, - com.amazonaws.auth.EnvironmentVariableCredentialsProvider + software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider List of credential providers to authenticate with the STS endpoint and diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md index caa86d657dcd2..6a82ec85965e1 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md @@ -36,12 +36,7 @@ The [SDK V2](https://github.com/aws/aws-sdk-java-v2) for S3 is very different fr A complete list of the changes can be found in the [Changelog](https://github.com/aws/aws-sdk-java-v2/blob/master/docs/LaunchChangelog.md#41-s3-changes). -# S3A integration changes. - -## Deployment Changes - - -### Packaging: `aws-java-sdk-bundle-1.12.x.jar` becomes `bundle-2.x.y.jar` +## Packaging: `aws-java-sdk-bundle-1.12.x.jar` becomes `bundle-2.x.y.jar` As the module name is lost, in hadoop releases a large JAR file with the name "bundle" is now part of the distribution. @@ -58,17 +53,116 @@ As before: the exact set of dependencies used by the S3A connector is neither defined nor comes with any commitments of stability or compatibility of dependent libraries. -### Configuration Option Changes -### Credential Providers declared in `fs.s3a.aws.credentials.provider` -V1 Credential providers are *only* supported when the V1 SDK is on the classpath. +## Credential Provider changes and migration + +- Interface change: [com.amazonaws.auth.AWSCredentialsProvider](https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-core/src/main/java/com/amazonaws/auth/AWSCredentialsProvider.java) +has been replaced by [software.amazon.awssdk.auth.credentials.AwsCredentialsProvider](https://github.com/aws/aws-sdk-java-v2/blob/master/core/auth/src/main/java/software/amazon/awssdk/auth/credentials/AwsCredentialsProvider.java). +- Credential provider class changes: the package and class names of credential providers have +changed. + +The change in interface will mean that custom credential providers will need to be updated to now +implement `software.amazon.awssdk.auth.credentials.AwsCredentialsProvider` instead of +`com.amazonaws.auth.AWSCredentialsProvider`. + +### Original V1 `AWSCredentialsProvider` interface + +Note how the interface begins with the capitalized "AWS" acronym. +The V2 interface starts with "Aws". This is a very subtle change +for developers to spot. +Compilers _will_ detect and report the type mismatch. + + +```java +package com.amazonaws.auth; + +public interface AWSCredentialsProvider { + + public AWSCredentials getCredentials(); + + public void refresh(); + +} + +``` +The interface binding also supported a factory method, `AWSCredentialsProvider instance()` which, +if available, would be invoked in preference to using any constructor. + +If the interface implemented `Closeable` or `AutoCloseable`, these would +be invoked when the provider chain was being shut down. + +### New V2 `AwsCredentialsProvider` interface + +```java +package software.amazon.awssdk.auth.credentials; + +public interface AwsCredentialsProvider { + + AwsCredentials resolveCredentials(); + +} +``` + +1. There is no `refresh()` method any more. +2. `getCredentials()` has become `resolveCredentials()`. +3. There is now the expectation in the SDK that credential resolution/lookup etc will be + performed in `resolveCredentials()`. +4. If the interface implements `Closeable` or `AutoCloseable`, these will + be invoked when the provider chain is being shut down. +5. A static method `create()` which returns an `AwsCredentialsProvider` or subclass; this will be used + in preference to a constructor + +### S3A `AWSCredentialProviderList` is now a V2 credential provider + +The class `org.apache.hadoop.fs.s3a.AWSCredentialProviderList` has moved from +being a V1 to a V2 credential provider; even if an instance can be created with +existing code, the V1 methods will not resolve: + +``` +java.lang.NoSuchMethodError: org.apache.hadoop.fs.s3a.AWSCredentialProviderList.getCredentials()Lcom/amazonaws/auth/AWSCredentials; + at org.apache.hadoop.fs.store.diag.S3ADiagnosticsInfo.validateFilesystem(S3ADiagnosticsInfo.java:903) +``` + +### Migration of Credential Providers listed in `fs.s3a.aws.credentials.provider` + + +Before: `fs.s3a.aws.credentials.provider` took a list of v1 credential providers, +This took a list containing +1. V1 credential providers implemented in the `hadoop-aws` module. +2. V1 credential providers implemented in the `aws-sdk-bundle` library. +3. Custom V1 credential providers placed onto the classpath. +4. Custom subclasses of hadoop-aws credential providers. + +And here is how they change +1. All `hadoop-aws` credential providers migrated to V2. +2. Well-known `aws-sdk-bundle` credential providers _automatically remapped_ to their V2 equivalents. +3. Custom v1 providers supported if the original `aws-sdk-bundle` JAR is on the classpath. +4. Custom subclasses of hadoop-aws credential providers need manual migration. + +Because of (1) and (2), As result, standard `fs.s3a.aws.credentials.provider` configurations +should seamlessly upgrade. This also means that the same provider list, if restricted to +those classes, will work across versions. + + + +### `hadoop-aws` credential providers migration to V2 + +All the fs.s3a credential providers have the same name and functionality as before. + +| Hadoop module credential provider | Authentication Mechanism | +|----------------------------------------------------------------|--------------------------------------------------| +| `org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider` | Session Credentials in configuration | +| `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider` | Simple name/secret credentials in configuration | +| `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider` | Anonymous Login | +| `org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider` | [Assumed Role credentials](./assumed_roles.html) | +| `org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | EC2/k8s instance credentials | + +### Automatic `aws-sdk-bundle` credential provider remapping + +The commonly-used set of V1 credential providers are automatically remapped to V2 equivalents. + -The standard set of V1 credential providers used in hadoop deployments are -automatically remapped to V2 equivalents, -while the stable hadoop providers have been upgraded in place; their names -are unchanged. -As result, standard cluster configurations should seamlessly upgrade. | V1 Credential Provider | Remapped V2 substitute | |-------------------------------------------------------------|----------------------------------------------------------------------------------| @@ -76,11 +170,11 @@ As result, standard cluster configurations should seamlessly upgrade. | `com.amazonaws.auth.EnvironmentVariableCredentialsProvider` | `software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider` | | `com.amazonaws.auth.EC2ContainerCredentialsProviderWrapper` | `org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | | `com.amazonaws.auth.InstanceProfileCredentialsProvider` | `org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | - +| `com.amazonaws.auth.profile.ProfileCredentialsProvider` | `software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider` | There are still a number of troublespots here: -#### Other `com.amazonaws.auth.` AWS providers +#### Less widely used`com.amazonaws.auth.` AWS providers There should be equivalents in the new SDK, but as well as being renamed they are likely to have moved different factory/builder mechanisms. @@ -98,12 +192,12 @@ it should still be possible to use the existing classes. Adding a V2 equivalent is the recommended long-term solution. -#### Private subclasses of the Hadoop credential providers +#### Custom subclasses of the Hadoop credential providers Because all the standard hadoop credential providers have been upgraded, any subclasses of these are not going to link or work. -These will need to be upgraded in source, as covered below. +These will need to be manually migrated to being V2 Credential providers. ## Source code/binary integration changes @@ -172,28 +266,28 @@ The `S3ClientFactory` interface has been replaced by one that creates a V2 `S3Cl #### `S3AFileSystem` method changes: `S3AInternals`. The low-level s3 operations/client accessors have been moved into a new interface, -`org.apache.hadoop.fs.s3a.S3AInternals`, which must be accessed via the +`org.apache.hadoop.fs.s3a.S3AInternals`, which must be accessed via the `S3AFileSystem.getS3AInternals()` method. -They have also been updated to return V2 SDK classes. +They have also been updated to return V2 SDK classes. ```java @InterfaceStability.Unstable @InterfaceAudience.LimitedPrivate("testing/diagnostics") public interface S3AInternals { S3Client getAmazonS3V2ClientForTesting(String reason); - + @Retries.RetryTranslated @AuditEntryPoint String getBucketLocation() throws IOException; - + @AuditEntryPoint @Retries.RetryTranslated String getBucketLocation(String bucketName) throws IOException; - + @AuditEntryPoint @Retries.RetryTranslated HeadObjectResponse getObjectMetadata(Path path) throws IOException; - + AWSCredentialProviderList shareCredentials(final String purpose); } ``` @@ -210,7 +304,8 @@ cannot find symbol [ERROR] location: variable fs of type org.apache.hadoop.fs.s3a.S3AFileSystem ``` -It has been replaced by an `S3AInternals` equivalent which returns the V2 `S3Client` of the filesystem instance. +It has been replaced by an `S3AInternals` equivalent which returns the V2 `S3Client` +of the filesystem instance. ```java ((S3AFilesystem)fs).getAmazonS3ClientForTesting("testing") @@ -220,22 +315,23 @@ It has been replaced by an `S3AInternals` equivalent which returns the V2 `S3Cli ((S3AFilesystem)fs).getS3AInternals().getAmazonS3ClientForTesting("testing") ``` -##### `S3AFileSystem.getObjectMetadata(Path path)` moved and return type changed +##### `S3AFileSystem.getObjectMetadata(Path path)` moved to `S3AInternals`; return type changed -The `getObjectMetadata(Path)` call returns an instance of the -`software.amazon.awssdk.services.s3.model.HeadObjectResponse` class +The `getObjectMetadata(Path)` call has been moved to the `S3AInternals` interface +and an instance of the `software.amazon.awssdk.services.s3.model.HeadObjectResponse` class +returned. +The original `S3AFileSystem` method has been deleted +Before: ```java ((S3AFilesystem)fs).getObjectMetadata(path) ``` +After: ```java ((S3AFilesystem)fs).getS3AInternals().getObjectMetadata(path) ``` -The original `S3AFileSystem` method has been retained (and forwards to the new interface's -implementation), however its return type has changed and is marked as deprecated. - ##### `AWSCredentialProviderList shareCredentials(String)` moved to `S3AInternals` The operation to share a reference-counted access to the AWS credentials used @@ -244,73 +340,6 @@ by the S3A FS has been moved to `S3AInternals`. This is very much an implementation method, used to allow extension modules to share an authentication chain into other AWS SDK client services (dynamoDB, etc.). -### Credential Providers - -- Interface change: [com.amazonaws.auth.AWSCredentialsProvider](https://github.com/aws/aws-sdk-java/blob/master/aws-java-sdk-core/src/main/java/com/amazonaws/auth/AWSCredentialsProvider.java) -has been replaced by [software.amazon.awssdk.auth.credentials.AwsCredentialsProvider](https://github.com/aws/aws-sdk-java-v2/blob/master/core/auth/src/main/java/software/amazon/awssdk/auth/credentials/AwsCredentialsProvider.java). -- Credential provider class changes: the package and class names of credential providers have -changed. - -The change in interface will mean that custom credential providers will need to be updated to now -implement `software.amazon.awssdk.auth.credentials.AwsCredentialsProvider` instead of -`com.amazonaws.auth.AWSCredentialsProvider`. - -#### Original V1 `AWSCredentialsProvider` interface - -Note how the interface begins with the capitalized "AWS" acronym. -The V2 interface starts with "Aws". This is a very subtle change -for developers to spot. -Compilers _will_ detect and report the type mismatch. - - -```java -package com.amazonaws.auth; - -public interface AWSCredentialsProvider { - - public AWSCredentials getCredentials(); - - public void refresh(); - -} - -``` -The interface binding also supported a factory method, `AWSCredentialsProvider instance()` which, -if available, would be invoked in preference to using any constructor. - -If the interface implemented `Closeable` or `AutoCloseable`, these would -be invoked when the provider chain was being shut down. - -#### V2 `AwsCredentialsProvider` interface - -```java -package software.amazon.awssdk.auth.credentials; - -public interface AwsCredentialsProvider { - - AwsCredentials resolveCredentials(); - -} -``` - -1. There is no `refresh()` method any more. -2. `getCredentials()` has become `resolveCredentials()`. -3. There is now the expectation in the SDK that credential resolution/lookup etc will be - performed in `resolveCredentials()`. -4. If the interface implements `Closeable` or `AutoCloseable`, these will - be invoked when the provider chain is being shut down. - -#### `AWSCredentialProviderList` is now a V2 credential provider - -The class `org.apache.hadoop.fs.s3a.AWSCredentialProviderList` has moved from -being a V1 to a V2 credential provider; even if an instance can be created with -existing code, the V1 methods will not resolve: - -``` -java.lang.NoSuchMethodError: org.apache.hadoop.fs.s3a.AWSCredentialProviderList.getCredentials()Lcom/amazonaws/auth/AWSCredentials; - at org.apache.hadoop.fs.store.diag.S3ADiagnosticsInfo.validateFilesystem(S3ADiagnosticsInfo.java:903) -``` - ### Delegation Tokens 1. Custom credential providers used in delegation token binding classes will need to be updated diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md index 91f08bb730a50..43927723e365d 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/delegation_tokens.md @@ -338,7 +338,7 @@ Here is the effective list of providers if none are declared: org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider, org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, - com.amazonaws.auth.EnvironmentVariableCredentialsProvider, + software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider, org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index f0f6e926321eb..f85863abdddcb 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -249,56 +249,39 @@ a warning has been printed since Hadoop 2.8 whenever such a URL was used. ```xml fs.s3a.access.key - AWS access key ID. - Omit for IAM role-based or provider-based authentication. + AWS access key ID used by S3A file system. Omit for IAM role-based or provider-based authentication. fs.s3a.secret.key - AWS secret key. - Omit for IAM role-based or provider-based authentication. + AWS secret key used by S3A file system. Omit for IAM role-based or provider-based authentication. - fs.s3a.aws.credentials.provider - - Comma-separated class names of credential provider classes which implement - com.amazonaws.auth.AWSCredentialsProvider. - - These are loaded and queried in sequence for a valid set of credentials. - Each listed class must implement one of the following means of - construction, which are attempted in order: - 1. a public constructor accepting java.net.URI and - org.apache.hadoop.conf.Configuration, - 2. a public static method named getInstance that accepts no - arguments and returns an instance of - com.amazonaws.auth.AWSCredentialsProvider, or - 3. a public default constructor. - - Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider allows - anonymous access to a publicly accessible S3 bucket without any credentials. - Please note that allowing anonymous access to an S3 bucket compromises - security and therefore is unsuitable for most use cases. It can be useful - for accessing public data sets without requiring AWS credentials. - - If unspecified, then the default list of credential provider classes, - queried in sequence, is: - 1. org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider: - Uses the values of fs.s3a.access.key and fs.s3a.secret.key. - 2. com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports - configuration of AWS access key ID and secret access key in - environment variables named AWS_ACCESS_KEY_ID and - AWS_SECRET_ACCESS_KEY, as documented in the AWS SDK. - 3. com.amazonaws.auth.InstanceProfileCredentialsProvider: supports use - of instance profile credentials if running in an EC2 VM. + fs.s3a.session.token + Session token, when using org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider + as one of the providers. - + - fs.s3a.session.token + fs.s3a.aws.credentials.provider + + org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider, + org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, + software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider, + org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider + - Session token, when using org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider - as one of the providers. + Comma-separated class names of credential provider classes which implement + software.amazon.awssdk.auth.credentials.AwsCredentialsProvider. + + When S3A delegation tokens are not enabled, this list will be used + to directly authenticate with S3 and other AWS services. + When S3A Delegation tokens are enabled, depending upon the delegation + token binding it may be used + to communicate wih the STS endpoint to request session/role + credentials. ``` @@ -350,13 +333,19 @@ credentials if they are defined. 1. The [AWS environment variables](http://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html#cli-environment), are then looked for: these will return session or full credentials depending on which values are set. -1. An attempt is made to query the Amazon EC2 Instance Metadata Service to +1. An attempt is made to query the Amazon EC2 Instance/k8s container Metadata Service to retrieve credentials published to EC2 VMs. S3A can be configured to obtain client authentication providers from classes -which integrate with the AWS SDK by implementing the `com.amazonaws.auth.AWSCredentialsProvider` -Interface. This is done by listing the implementation classes, in order of +which integrate with the AWS SDK by implementing the +`software.amazon.awssdk.auth.credentials.AwsCredentialsProvider` +interface. +This is done by listing the implementation classes, in order of preference, in the configuration option `fs.s3a.aws.credentials.provider`. +In previous hadoop releases, providers were required to +implement the AWS V1 SDK interface `com.amazonaws.auth.AWSCredentialsProvider`. +Consult the [Upgrading S3A to AWS SDK V2](./aws_sdk_upgrade.html) documentation +to see how to migrate credential providers. *Important*: AWS Credential Providers are distinct from _Hadoop Credential Providers_. As will be covered later, Hadoop Credential Providers allow passwords and other secrets @@ -371,21 +360,23 @@ this is advised as a more secure way to store valuable secrets. There are a number of AWS Credential Providers inside the `hadoop-aws` JAR: -| classname | description | -|-----------|-------------| -| `org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider`| Session Credentials | -| `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider`| Simple name/secret credentials | -| `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider`| Anonymous Login | -| `org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider<`| [Assumed Role credentials](assumed_roles.html) | +| Hadoop module credential provider | Authentication Mechanism | +|----------------------------------------------------------------|--------------------------------------------------| +| `org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider` | Session Credentials in configuration | +| `org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider` | Simple name/secret credentials in configuration | +| `org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider` | Anonymous Login | +| `org.apache.hadoop.fs.s3a.auth.AssumedRoleCredentialProvider` | [Assumed Role credentials](./assumed_roles.html) | +| `org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider` | EC2/k8s instance credentials | -There are also many in the Amazon SDKs, in particular two which are automatically -set up in the authentication chain: +There are also many in the Amazon SDKs, with the common ones being. | classname | description | |-----------|-------------| -| `com.amazonaws.auth.InstanceProfileCredentialsProvider`| EC2 Metadata Credentials | -| `com.amazonaws.auth.EnvironmentVariableCredentialsProvider`| AWS Environment Variables | +| `software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider` | AWS Environment Variables | +| `software.amazon.awssdk.auth.credentials.InstanceProfileCredentialsProvider`| EC2 Metadata Credentials | +| `software.amazon.awssdk.auth.credentials.ContainerCredentialsProvider`| EC2/k8s Metadata Credentials | + ### EC2 IAM Metadata Authentication with `InstanceProfileCredentialsProvider` @@ -402,7 +393,7 @@ You can configure Hadoop to authenticate to AWS using a [named profile](https:// To authenticate with a named profile: -1. Declare `com.amazonaws.auth.profile.ProfileCredentialsProvider` as the provider. +1. Declare `software.amazon.awssdk.auth.credentials.ProfileCredentialsProvider` as the provider. 1. Set your profile via the `AWS_PROFILE` environment variable. 1. Due to a [bug in version 1 of the AWS Java SDK](https://github.com/aws/aws-sdk-java/issues/803), you'll need to remove the `profile` prefix from the AWS configuration section heading. @@ -525,50 +516,9 @@ This means that the default S3A authentication chain can be defined as org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider, org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider, - com.amazonaws.auth.EnvironmentVariableCredentialsProvider, + software.amazon.awssdk.auth.credentials.EnvironmentVariableCredentialsProvider org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider - - Comma-separated class names of credential provider classes which implement - com.amazonaws.auth.AWSCredentialsProvider. - - When S3A delegation tokens are not enabled, this list will be used - to directly authenticate with S3 and other AWS services. - When S3A Delegation tokens are enabled, depending upon the delegation - token binding it may be used - to communicate with the STS endpoint to request session/role - credentials. - - These are loaded and queried in sequence for a valid set of credentials. - Each listed class must implement one of the following means of - construction, which are attempted in order: - * a public constructor accepting java.net.URI and - org.apache.hadoop.conf.Configuration, - * a public constructor accepting org.apache.hadoop.conf.Configuration, - * a public static method named getInstance that accepts no - arguments and returns an instance of - com.amazonaws.auth.AWSCredentialsProvider, or - * a public default constructor. - - Specifying org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider allows - anonymous access to a publicly accessible S3 bucket without any credentials. - Please note that allowing anonymous access to an S3 bucket compromises - security and therefore is unsuitable for most use cases. It can be useful - for accessing public data sets without requiring AWS credentials. - - If unspecified, then the default list of credential provider classes, - queried in sequence, is: - * org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider: looks - for session login secrets in the Hadoop configuration. - * org.apache.hadoop.fs.s3a.SimpleAWSCredentialsProvider: - Uses the values of fs.s3a.access.key and fs.s3a.secret.key. - * com.amazonaws.auth.EnvironmentVariableCredentialsProvider: supports - configuration of AWS access key ID and secret access key in - environment variables named AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, - and AWS_SESSION_TOKEN as documented in the AWS SDK. - * org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider: picks up - IAM credentials of any EC2 VM or AWS container in which the process is running. - ``` @@ -1415,7 +1365,7 @@ role information available when deployed in Amazon EC2. ```xml fs.s3a.aws.credentials.provider - com.amazonaws.auth.InstanceProfileCredentialsProvider + org.apache.hadoop.fs.s3a.auth.IAMInstanceCredentialsProvider ``` @@ -2137,7 +2087,7 @@ If no custom signers are being used - this value does not need to be set. `SignerName:SignerClassName` - register a new signer with the specified name, and the class for this signer. -The Signer Class must implement `com.amazonaws.auth.Signer`. +The Signer Class must implement `software.amazon.awssdk.core.signer.Signer`. `SignerName:SignerClassName:SignerInitializerClassName` - similar time above except also allows for a custom SignerInitializer diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md index ebb21f104d046..e2550ba0e25d4 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/troubleshooting_s3a.md @@ -2023,51 +2023,3 @@ com.amazonaws.SdkClientException: Unable to execute HTTP request: When this happens, try to set `fs.s3a.connection.request.timeout` to a larger value or disable it completely by setting it to `0`. - -## SDK Upgrade Warnings - -S3A will soon be upgraded to [AWS's Java SDK V2](https://github.com/aws/aws-sdk-java-v2). -For more information on the upgrade and what's changing, see -[Upcoming upgrade to AWS Java SDK V2](./aws_sdk_upgrade.html). - -S3A logs the following warnings for things that will be changing in the upgrade. To disable these -logs, comment out `log4j.logger.org.apache.hadoop.fs.s3a.SDKV2Upgrade` in log4j.properties. - -### `Directly referencing AWS SDK V1 credential provider` - -This will be logged when an AWS credential provider is referenced directly in -`fs.s3a.aws.credentials.provider`. -For example, `com.amazonaws.auth.AWSSessionCredentialsProvider` - -To stop this warning, remove any AWS credential providers from `fs.s3a.aws.credentials.provider`. -Instead, use S3A's credential providers. - -### `getAmazonS3ClientForTesting() will be removed` - -This will be logged when `getAmazonS3ClientForTesting()` is called to get the S3 Client. With V2, -the S3 client will change from type `com.amazonaws.services.s3.AmazonS3` to -`software.amazon.awssdk.services.s3.S3Client`, and so this method will be removed. - -### -### `Custom credential providers used in delegation tokens binding classes will need to be updated` - -This will be logged when delegation tokens are used. -Delegation tokens allow the use of custom binding classes which can implement custom credential -providers. -These credential providers will currently be implementing -`com.amazonaws.auth.AWSCredentialsProvider` and will need to be updated to implement -`software.amazon.awssdk.auth.credentials.AwsCredentialsProvider`. - -### -### `The signer interface has changed in AWS SDK V2, custom signers will need to be updated` - -This will be logged when a custom signer is used. -Custom signers will currently be implementing `com.amazonaws.auth.Signer` and will need to be -updated to implement `software.amazon.awssdk.core.signer.Signer`. - -### -### `getObjectMetadata() called. This operation and it's response will be changed` - -This will be logged when `getObjectMetadata` is called. In SDK V2, this operation has changed to -`headObject()` and will return a response of the type `HeadObjectResponse`. - diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index f47e498edf9bb..f6745a0bad01b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -95,7 +95,7 @@ public class ITestS3AConfiguration { * @param reason why? * @return the client */ - private final S3Client getS3Client(String reason) { + private S3Client getS3Client(String reason) { return requireNonNull(getS3AInternals().getAmazonS3V2ClientForTesting(reason)); } From 34beeeae7bd81f00f9bb740418dd2334dd6d3488 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 15 Aug 2023 17:18:55 +0100 Subject: [PATCH 15/20] HADOOP-18820. checkstyle and minor code cleanups biggest change is that S3AInternals exports getBucketMetadata(), this was used in a test, and its output indirectly available on getXattrs(/). Change-Id: Iaf45ccbdd778be474e6ece07275ba179c215cf83 --- .../apache/hadoop/fs/s3a/S3AFileSystem.java | 14 +++++++- .../apache/hadoop/fs/s3a/S3AInternals.java | 11 ++++++ .../fs/s3a/SimpleAWSCredentialsProvider.java | 6 ++-- .../hadoop/fs/s3a/api/RequestFactory.java | 6 +++- .../s3a/ITestS3AAWSCredentialsProvider.java | 4 +-- .../fs/s3a/ITestS3ABucketExistence.java | 4 +-- .../hadoop/fs/s3a/ITestS3AConfiguration.java | 35 +++++++++---------- .../hadoop/fs/s3a/ITestS3AEndpointRegion.java | 11 +----- .../hadoop/fs/s3a/ITestS3AMiscOperations.java | 2 +- .../hadoop/fs/s3a/S3ATestConstants.java | 5 +++ 10 files changed, 59 insertions(+), 39 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java index cc9c173689fe4..305695f2ef5ac 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AFileSystem.java @@ -1444,6 +1444,17 @@ public HeadObjectResponse getObjectMetadata(Path path) throws IOException { "getObjectMetadata")); } + /** + * S3AInternals method. + * {@inheritDoc}. + */ + @Override + @AuditEntryPoint + @Retries.RetryTranslated + public HeadBucketResponse getBucketMetadata() throws IOException { + return S3AFileSystem.this.getBucketMetadata(); + } + /** * Get a shared copy of the AWS credentials, with its reference * counter updated. @@ -2821,7 +2832,8 @@ protected HeadObjectResponse getObjectMetadata(String key, * @throws UnknownStoreException the bucket is absent * @throws IOException any other problem talking to S3 */ - @Retries.RetryRaw + @AuditEntryPoint + @Retries.RetryTranslated protected HeadBucketResponse getBucketMetadata() throws IOException { final HeadBucketResponse response = trackDurationAndSpan(STORE_EXISTS_PROBE, bucket, null, () -> invoker.retry("getBucketMetadata()", bucket, true, () -> { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java index 03e950e8fc47c..989cb73fe02a5 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/S3AInternals.java @@ -22,6 +22,7 @@ import java.nio.file.AccessDeniedException; import software.amazon.awssdk.services.s3.S3Client; +import software.amazon.awssdk.services.s3.model.HeadBucketResponse; import software.amazon.awssdk.services.s3.model.HeadObjectResponse; import org.apache.hadoop.classification.InterfaceAudience; @@ -96,4 +97,14 @@ public interface S3AInternals { * @return a reference to shared credentials. */ AWSCredentialProviderList shareCredentials(String purpose); + + /** + * Request bucket metadata. + * @return the metadata + * @throws UnknownStoreException the bucket is absent + * @throws IOException any other problem talking to S3 + */ + @AuditEntryPoint + @Retries.RetryTranslated + HeadBucketResponse getBucketMetadata() throws IOException; } diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java index 186698b5f1848..f7eaf825b9c94 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/SimpleAWSCredentialsProvider.java @@ -61,7 +61,7 @@ public class SimpleAWSCredentialsProvider implements AwsCredentialsProvider { */ public SimpleAWSCredentialsProvider(final URI uri, final Configuration conf) throws IOException { - this(getAWSAccessKeys(uri, conf)); + this(getAWSAccessKeys(uri, conf)); } /** @@ -73,8 +73,8 @@ public SimpleAWSCredentialsProvider(final URI uri, final Configuration conf) @VisibleForTesting SimpleAWSCredentialsProvider(final S3xLoginHelper.Login login) throws IOException { - this.accessKey = login.getUser(); - this.secretKey = login.getPassword(); + this.accessKey = login.getUser(); + this.secretKey = login.getPassword(); } @Override diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java index c7984aa4e2605..99a898f728166 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/api/RequestFactory.java @@ -41,13 +41,15 @@ import software.amazon.awssdk.services.s3.model.StorageClass; import software.amazon.awssdk.services.s3.model.UploadPartRequest; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.fs.PathIOException; import org.apache.hadoop.fs.s3a.S3AEncryptionMethods; import org.apache.hadoop.fs.s3a.auth.delegation.EncryptionSecrets; import org.apache.hadoop.fs.s3a.impl.PutObjectOptions; /** - * Factory for S3 objects. + * Factory for S3 request objects. * * This is where the owner FS's {@code prepareRequest()} * callback is invoked to mark up a request for this span. @@ -61,6 +63,8 @@ * as there are no guarantees how they are processed. * That is: no guarantees of retry or translation. */ +@InterfaceStability.Unstable +@InterfaceAudience.LimitedPrivate("testing/diagnostics") public interface RequestFactory { /** diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java index 690607a530f7a..8f854ddb6e2ad 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java @@ -80,7 +80,7 @@ static class BadCredentialsProviderConstructor implements AwsCredentialsProvider { @SuppressWarnings("unused") - public BadCredentialsProviderConstructor(String fsUri, Configuration conf) { + BadCredentialsProviderConstructor(String fsUri, Configuration conf) { } @Override @@ -125,7 +125,7 @@ private void createFailingFS(Configuration conf) throws IOException { static class BadCredentialsProvider implements AwsCredentialsProvider { @SuppressWarnings("unused") - public BadCredentialsProvider(Configuration conf) { + BadCredentialsProvider(Configuration conf) { } @Override diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java index 66808d52a5040..38c4685eb137e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3ABucketExistence.java @@ -131,7 +131,7 @@ private Configuration createConfigurationWithProbe(final int probe) { ENDPOINT, AWS_REGION); conf.setInt(S3A_BUCKET_PROBE, probe); - conf.set(AWS_REGION, "eu-west-1"); + conf.set(AWS_REGION, EU_WEST_1); return conf; } @@ -210,7 +210,7 @@ public void testAccessPointRequired() throws Exception { */ private Configuration createArnConfiguration() { Configuration configuration = createConfigurationWithProbe(2); - configuration.set(AWS_REGION, "eu-west-1"); + configuration.set(AWS_REGION, EU_WEST_1); return configuration; } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java index f6745a0bad01b..d970d8a3ca7d0 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AConfiguration.java @@ -19,8 +19,8 @@ package org.apache.hadoop.fs.s3a; import java.io.File; -import java.io.IOException; import java.net.URI; +import java.nio.file.AccessDeniedException; import java.security.PrivilegedExceptionAction; import org.assertj.core.api.Assertions; @@ -38,7 +38,6 @@ import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.S3Configuration; import software.amazon.awssdk.services.s3.model.HeadBucketRequest; -import software.amazon.awssdk.services.s3.model.S3Exception; import software.amazon.awssdk.services.sts.StsClient; import software.amazon.awssdk.services.sts.model.StsException; @@ -60,6 +59,7 @@ import static java.util.Objects.requireNonNull; import static org.apache.hadoop.fs.s3a.Constants.*; +import static org.apache.hadoop.fs.s3a.S3ATestConstants.EU_WEST_1; import static org.apache.hadoop.fs.s3a.S3AUtils.*; import static org.apache.hadoop.fs.s3a.S3ATestUtils.*; import static org.apache.hadoop.test.LambdaTestUtils.intercept; @@ -134,7 +134,6 @@ public void testEndpoint() throws Exception { } else { conf.set(Constants.ENDPOINT, endpoint); fs = S3ATestUtils.createTestFileSystem(conf); - S3Client s3 = getS3Client("test endpoint"); String endPointRegion = ""; // Differentiate handling of "s3-" and "s3." based endpoint identifiers String[] endpointParts = StringUtils.split(endpoint, '.'); @@ -173,7 +172,7 @@ protected void useFailFastConfiguration() { } /** - * Expect a filesystem to not be created from a configuration + * Expect a filesystem to not be created from a configuration. * @return the exception intercepted * @throws Exception any other exception */ @@ -540,36 +539,34 @@ public void testConfOptionPropagationToFS() throws Exception { } @Test(timeout = 10_000L) - public void testS3SpecificSignerOverride() throws IOException { + public void testS3SpecificSignerOverride() throws Exception { Configuration config = new Configuration(); + removeBaseAndBucketOverrides(config, + CUSTOM_SIGNERS, SIGNING_ALGORITHM_S3, SIGNING_ALGORITHM_STS, AWS_REGION); config.set(CUSTOM_SIGNERS, - "CustomS3Signer:" + CustomS3Signer.class.getName() + ",CustomSTSSigner:" - + CustomSTSSigner.class.getName()); + "CustomS3Signer:" + CustomS3Signer.class.getName() + + ",CustomSTSSigner:" + CustomSTSSigner.class.getName()); config.set(SIGNING_ALGORITHM_S3, "CustomS3Signer"); config.set(SIGNING_ALGORITHM_STS, "CustomSTSSigner"); - config.set(AWS_REGION, "eu-west-1"); + config.set(AWS_REGION, EU_WEST_1); fs = S3ATestUtils.createTestFileSystem(config); S3Client s3Client = getS3Client("testS3SpecificSignerOverride"); + final String bucket = fs.getBucket(); StsClient stsClient = - STSClientFactory.builder(config, fs.getBucket(), new AnonymousAWSCredentialsProvider(), "", + STSClientFactory.builder(config, bucket, new AnonymousAWSCredentialsProvider(), "", "").build(); - try { - stsClient.getSessionToken(); - } catch (StsException exception) { - // Expected 403, as credentials are not provided. - } + intercept(StsException.class, "", () -> + stsClient.getSessionToken()); - try { - s3Client.headBucket(HeadBucketRequest.builder().bucket(fs.getBucket()).build()); - } catch (S3Exception exception) { - // Expected 403, as credentials are not provided. - } + intercept(AccessDeniedException.class, "", () -> + Invoker.once("head", bucket, () -> + s3Client.headBucket(HeadBucketRequest.builder().bucket(bucket).build()))); Assertions.assertThat(CustomS3Signer.isS3SignerCalled()) .describedAs("Custom S3 signer not called").isTrue(); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java index c957ab7b6a438..e5e109ad91b50 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AEndpointRegion.java @@ -25,7 +25,6 @@ import java.util.List; import org.assertj.core.api.Assertions; -import org.junit.Assert; import org.junit.Test; import software.amazon.awssdk.awscore.AwsExecutionAttribute; import software.amazon.awssdk.awscore.exception.AwsServiceException; @@ -34,14 +33,12 @@ import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; import software.amazon.awssdk.services.s3.S3Client; import software.amazon.awssdk.services.s3.model.HeadBucketRequest; -import software.amazon.awssdk.services.s3.model.S3Exception; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.s3a.statistics.impl.EmptyS3AStatisticsContext; import static org.apache.hadoop.fs.s3a.Constants.AWS_REGION; import static org.apache.hadoop.fs.s3a.Statistic.STORE_REGION_PROBE; -import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SC_301_MOVED_PERMANENTLY; import static org.apache.hadoop.test.LambdaTestUtils.intercept; /** @@ -68,13 +65,7 @@ public void testWithoutRegionConfig() throws IOException { S3AFileSystem fs = new S3AFileSystem(); fs.initialize(getFileSystem().getUri(), conf); - try { - fs.getBucketMetadata(); - } catch (S3Exception exception) { - if (exception.statusCode() == SC_301_MOVED_PERMANENTLY) { - Assert.fail(exception.toString()); - } - } + fs.getS3AInternals().getBucketMetadata(); Assertions.assertThat(fs.getInstrumentation().getCounterValue(STORE_REGION_PROBE)) .describedAs("Region is not configured, region probe should have been made").isEqualTo(1); diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java index 285e6b62bc563..ee92bc1706eb8 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AMiscOperations.java @@ -412,7 +412,7 @@ private static T verifyNoTrailingSlash(String role, T o) { private GetBucketEncryptionResponse getDefaultEncryption() throws IOException { S3AFileSystem fs = getFileSystem(); S3Client s3 = getS3AInternals().getAmazonS3V2ClientForTesting("check default encryption"); - try { + try (AuditSpan s = span()){ return Invoker.once("getBucketEncryption()", fs.getBucket(), () -> s3.getBucketEncryption(GetBucketEncryptionRequest.builder() diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java index a6269c437665a..246d111d14b8d 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/S3ATestConstants.java @@ -251,4 +251,9 @@ public interface S3ATestConstants { * Value: {@value}. */ String PROJECT_BUILD_DIRECTORY_PROPERTY = "project.build.directory"; + + /** + * AWS ireland region. + */ + String EU_WEST_1 = "eu-west-1"; } From f6eeb29f6a8d806d1033a19d1ec2ec4ebe8363be Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Tue, 15 Aug 2023 19:39:20 +0100 Subject: [PATCH 16/20] HADOOP-18820. ITestS3AAWSCredentialsProvider enhancements * use intercept() and InstantiationIOException where appropriate. * change method names to distinguish them better. * add test to verify that the static create() method is called. Change-Id: I88e6c4eed6153861b6bd83300e6c484eb4dda08e --- .../s3a/ITestS3AAWSCredentialsProvider.java | 143 ++++++++++++------ 1 file changed, 98 insertions(+), 45 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java index 8f854ddb6e2ad..1876a169d48fc 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java @@ -27,8 +27,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.s3a.impl.InstantiationIOException; -import org.apache.hadoop.test.GenericTestUtils; +import org.assertj.core.api.Assertions; import org.junit.Rule; import org.junit.Test; import org.junit.rules.Timeout; @@ -43,10 +43,13 @@ import static org.apache.hadoop.fs.s3a.S3ATestUtils.getCSVTestPath; import static org.apache.hadoop.fs.s3a.S3ATestUtils.removeBaseAndBucketOverrides; import static org.apache.hadoop.fs.s3a.auth.delegation.DelegationConstants.DELEGATION_TOKEN_BINDING; +import static org.apache.hadoop.fs.s3a.impl.InstantiationIOException.CONSTRUCTOR_EXCEPTION; +import static org.apache.hadoop.test.LambdaTestUtils.intercept; import static org.junit.Assert.*; /** - * Integration tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic. + * Integration tests for {@link Constants#AWS_CREDENTIALS_PROVIDER} logic + * through the S3A Filesystem instantiation process. */ public class ITestS3AAWSCredentialsProvider { private static final Logger LOG = @@ -55,17 +58,21 @@ public class ITestS3AAWSCredentialsProvider { @Rule public Timeout testTimeout = new Timeout(60_1000, TimeUnit.MILLISECONDS); + /** + * Expecting a wrapped ClassNotFoundException. + */ @Test - public void testBadConfiguration() throws IOException { - Configuration conf = createConf(); - conf.set(AWS_CREDENTIALS_PROVIDER, "no.such.class"); - try { - createFailingFS(conf); - } catch (IOException e) { - if (!(e.getCause() instanceof ClassNotFoundException)) { - LOG.error("Unexpected nested cause: {} in {}", e.getCause(), e, e); - throw e; - } + public void testProviderClassNotFound() throws Exception { + Configuration conf = createConf("no.such.class"); + final InstantiationIOException e = + intercept(InstantiationIOException.class, "java.lang.ClassNotFoundException", () -> + createFailingFS(conf)); + if (InstantiationIOException.Kind.InstantiationFailure != e.getKind()) { + throw e; + } + if (!(e.getCause() instanceof ClassNotFoundException)) { + LOG.error("Unexpected nested cause: {} in {}", e.getCause(), e, e); + throw e; } } @@ -73,14 +80,14 @@ public void testBadConfiguration() throws IOException { * A bad CredentialsProvider which has no suitable constructor. * * This class does not provide a public constructor accepting Configuration, - * or a public factory method named getInstance that accepts no arguments, + * or a public factory method named create() that accepts no arguments, * or a public default constructor. */ - static class BadCredentialsProviderConstructor + public static class BadCredentialsProviderConstructor implements AwsCredentialsProvider { @SuppressWarnings("unused") - BadCredentialsProviderConstructor(String fsUri, Configuration conf) { + public BadCredentialsProviderConstructor(String fsUri, Configuration conf) { } @Override @@ -92,40 +99,60 @@ public AwsCredentials resolveCredentials() { @Test public void testBadCredentialsConstructor() throws Exception { - Configuration conf = createConf(); - conf.set(AWS_CREDENTIALS_PROVIDER, - BadCredentialsProviderConstructor.class.getName()); - try { - createFailingFS(conf); - } catch (IOException e) { - GenericTestUtils.assertExceptionContains(InstantiationIOException.CONSTRUCTOR_EXCEPTION, e); + Configuration conf = createConf(BadCredentialsProviderConstructor.class); + final InstantiationIOException ex = + intercept(InstantiationIOException.class, CONSTRUCTOR_EXCEPTION, () -> + createFailingFS(conf)); + if (InstantiationIOException.Kind.UnsupportedConstructor != ex.getKind()) { + throw ex; } } - protected Configuration createConf() { + /** + * Create a configuration bonded to the given provider classname. + * @param provider provider to bond to + * @return a configuration + */ + protected Configuration createConf(String provider) { Configuration conf = new Configuration(); removeBaseAndBucketOverrides(conf, DELEGATION_TOKEN_BINDING, AWS_CREDENTIALS_PROVIDER); + conf.set(AWS_CREDENTIALS_PROVIDER, provider); + conf.set(DELEGATION_TOKEN_BINDING, ""); return conf; } + /** + * Create a configuration bonded to the given provider class. + * @param provider provider to bond to + * @return a configuration + */ + protected Configuration createConf(Class provider) { + return createConf(provider.getName()); + } + /** * Create a filesystem, expect it to fail by raising an IOException. * Raises an assertion exception if in fact the FS does get instantiated. + * The FS is always deleted. * @param conf configuration * @throws IOException an expected exception. */ private void createFailingFS(Configuration conf) throws IOException { - S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf); - fs.listStatus(new Path("/")); - fail("Expected exception - got " + fs); + try(S3AFileSystem fs = S3ATestUtils.createTestFileSystem(conf)) { + fs.listStatus(new Path("/")); + fail("Expected exception - got " + fs); + } } - static class BadCredentialsProvider implements AwsCredentialsProvider { + /** + * Returns an invalid set of credentials. + */ + public static class BadCredentialsProvider implements AwsCredentialsProvider { @SuppressWarnings("unused") - BadCredentialsProvider(Configuration conf) { + public BadCredentialsProvider(Configuration conf) { } @Override @@ -137,34 +164,60 @@ public AwsCredentials resolveCredentials() { @Test public void testBadCredentials() throws Exception { - Configuration conf = new Configuration(); - conf.set(AWS_CREDENTIALS_PROVIDER, BadCredentialsProvider.class.getName()); - try { - createFailingFS(conf); - } catch (AccessDeniedException e) { - // expected - } catch (AWSServiceIOException e) { - GenericTestUtils.assertExceptionContains( - "UnrecognizedClientException", e); - // expected - } + Configuration conf = createConf(BadCredentialsProvider.class); + intercept(AccessDeniedException.class, "", () -> + createFailingFS(conf)); } + /** + * Test using the anonymous credential provider with the public csv + * test file; if the test file path is unset then it will be skipped. + */ @Test public void testAnonymousProvider() throws Exception { - Configuration conf = new Configuration(); - conf.set(AWS_CREDENTIALS_PROVIDER, - AnonymousAWSCredentialsProvider.class.getName()); + Configuration conf = createConf(AnonymousAWSCredentialsProvider.class); Path testFile = getCSVTestPath(conf); try (FileSystem fs = FileSystem.newInstance(testFile.toUri(), conf)) { - assertNotNull("S3AFileSystem instance must not be null", fs); - assertTrue("FileSystem must be the instance of S3AFileSystem", fs instanceof S3AFileSystem); + Assertions.assertThat(fs) + .describedAs("Filesystem") + .isNotNull(); FileStatus stat = fs.getFileStatus(testFile); - assertNotNull("FileStatus with qualified path must not be null", stat); assertEquals( "The qualified path returned by getFileStatus should be same as the original file", testFile, stat.getPath()); } } + /** + * Create credentials via the create() method. + * They are invalid credentials, so IO will fail as access denied. + */ + @Test + public void testCredentialsWithCreateMethod() throws Exception { + Configuration conf = createConf(CredentialsProviderWithCreateMethod.class); + intercept(AccessDeniedException.class, "", () -> + createFailingFS(conf)); + } + + /** + * Credentials via the create() method. + */ + public static class CredentialsProviderWithCreateMethod implements AwsCredentialsProvider { + + public static AwsCredentialsProvider create() { + LOG.info("creating CredentialsProviderWithCreateMethod"); + return new CredentialsProviderWithCreateMethod(); + } + + /** Private: cannot be created directly. */ + private CredentialsProviderWithCreateMethod() { + } + + @Override + public AwsCredentials resolveCredentials() { + return AwsBasicCredentials.create("bad_key", "bad_secret"); + } + + } + } From 644b390e9e491dbb2b3caf8cbcd0b35bc9e2a11f Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Wed, 16 Aug 2023 14:11:42 +0100 Subject: [PATCH 17/20] HADOOP-18820. style and line endings Change-Id: If7ca8bb9c89794e079e0de4c9de46da8ac565032 --- .../hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md | 4 ++-- .../apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md index f85863abdddcb..fc0eb3e3666e3 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/index.md @@ -262,7 +262,7 @@ a warning has been printed since Hadoop 2.8 whenever such a URL was used. Session token, when using org.apache.hadoop.fs.s3a.TemporaryAWSCredentialsProvider as one of the providers. - + fs.s3a.aws.credentials.provider @@ -339,7 +339,7 @@ on which values are set. S3A can be configured to obtain client authentication providers from classes which integrate with the AWS SDK by implementing the `software.amazon.awssdk.auth.credentials.AwsCredentialsProvider` -interface. +interface. This is done by listing the implementation classes, in order of preference, in the configuration option `fs.s3a.aws.credentials.provider`. In previous hadoop releases, providers were required to diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java index 1876a169d48fc..bccbe79c2a48b 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/ITestS3AAWSCredentialsProvider.java @@ -202,7 +202,7 @@ public void testCredentialsWithCreateMethod() throws Exception { /** * Credentials via the create() method. */ - public static class CredentialsProviderWithCreateMethod implements AwsCredentialsProvider { + public static final class CredentialsProviderWithCreateMethod implements AwsCredentialsProvider { public static AwsCredentialsProvider create() { LOG.info("creating CredentialsProviderWithCreateMethod"); From f7b2638c38d414e5361a85ab7fab24f94ee808e1 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 17 Aug 2023 16:43:57 +0100 Subject: [PATCH 18/20] HADOOP-18820: cut v1ProviderReferenced; update audit docs Cut unused v1ProviderReferenced() method audit doc update - auditing is enabled again - new fs.s3a.audit.execution.interceptors option - how to log httpclient Change-Id: Ib0c5b86407e059d53c469151cf3d89ede65e9116 --- .../hadoop/fs/s3a/impl/V2Migration.java | 16 +------ .../markdown/tools/hadoop-aws/auditing.md | 46 +++++++++++++++---- .../tools/hadoop-aws/aws_sdk_upgrade.md | 8 ++++ 3 files changed, 45 insertions(+), 25 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java index 51ccdb857a149..58db60d606eac 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java @@ -45,30 +45,16 @@ private V2Migration() { } public static final Logger SDK_V2_UPGRADE_LOG = LoggerFactory.getLogger(SDK_V2_UPGRADE_LOG_NAME); - private static final LogExactlyOnce WARN_OF_DIRECTLY_REFERENCED_CREDENTIAL_PROVIDER = - new LogExactlyOnce(SDK_V2_UPGRADE_LOG); - private static final LogExactlyOnce WARN_OF_REQUEST_HANDLERS = new LogExactlyOnce(SDK_V2_UPGRADE_LOG); - /** - * Notes an AWS V1 credential provider being referenced directly. - * @param name name of the credential provider - */ - public static void v1ProviderReferenced(String name) { - WARN_OF_DIRECTLY_REFERENCED_CREDENTIAL_PROVIDER.debug( - "Directly referencing AWS SDK V1 credential provider {}. AWS SDK V1 credential " - + "providers will be removed once S3A is upgraded to SDK V2", name); - } - - /** * Notes use of request handlers. */ public static void v1RequestHandlersUsed() { WARN_OF_REQUEST_HANDLERS.warn( "The request handler interface has changed in AWS SDK V2, use exception interceptors " - + "once S3A is upgraded to SDK V2"); + + "now S3A is upgraded to SDK V2"); } } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md index 9f107809eca6f..86acceadc8eaf 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md @@ -22,7 +22,7 @@ and inside the AWS S3 SDK, immediately before the request is executed. The full architecture is covered in [Auditing Architecture](auditing_architecture.html); this document covers its use. -## Important: Auditing is disabled by default +## Important: Auditing is currently enabled Due to a memory leak from the use of `ThreadLocal` fields, this auditing feature leaked memory as S3A filesystem instances were created and deleted. @@ -32,7 +32,7 @@ See [HADOOP-18091](https://issues.apache.org/jira/browse/HADOOP-18091) _S3A audi To avoid these memory leaks, auditing was disabled by default in the hadoop 3.3.2 release. -As these memory leaks have now been fixed, auditing has been re-enabled. +As these memory leaks have now been fixed, auditing has been re-enabled in Hadoop 3.3.5+ To disable it, set `fs.s3a.audit.enabled` to `false`. @@ -77,7 +77,7 @@ ideally even identifying the process/job generating load. ## Using Auditing -Auditing is disabled by default. +Auditing is enabled by default. When auditing enabled, a Logging Auditor will annotate the S3 logs through a custom HTTP Referrer header in requests made to S3. Other auditor classes may be used instead. @@ -88,7 +88,7 @@ Other auditor classes may be used instead. |--------|---------|---------------| | `fs.s3a.audit.enabled` | Is auditing enabled? | `true` | | `fs.s3a.audit.service.classname` | Auditor classname | `org.apache.hadoop.fs.s3a.audit.impl.LoggingAuditor` | -| `fs.s3a.audit.request.handlers` | List of extra subclasses of AWS SDK RequestHandler2 to include in handler chain | `""` | +| `fs.s3a.audit.execution.interceptors` | Implementations of AWS v2 SDK `ExecutionInterceptor` to include in handler chain | `""` | | `fs.s3a.audit.referrer.enabled` | Logging auditor to publish the audit information in the HTTP Referrer header | `true` | | `fs.s3a.audit.referrer.filter` | List of audit fields to filter | `""` | | `fs.s3a.audit.reject.out.of.span.operations` | Auditor to reject operations "outside of a span" | `false` | @@ -96,14 +96,14 @@ Other auditor classes may be used instead. ### Disabling Auditing. -In this release of Hadoop, auditing is disabled. +In this release of Hadoop, auditing is enabled by default. This can be explicitly set globally or for specific buckets ```xml fs.s3a.audit.enabled - false + true ``` @@ -162,6 +162,23 @@ correlate access by S3 clients to the actual operations taking place. Note: this logging is described as "Best Effort". There's no guarantee as to when logs arrive. +### Integration with AWS SDK request processing + +The auditing component inserts itself into the AWS SDK request processing +code, so it can attach the referrer header. + +It is possible to declare extra classes to add to the processing chain, +all of which must implement the interface `software.amazon.awssdk.core.interceptor.ExecutionInterceptor`. + +The list of classes is set in the configuration option `fs.s3a.audit.execution.interceptors`. + +Before the upgrade to the V2 SDK, a list of extra subclasses of the AWS SDK `com.amazonaws.handlers.RequestHandler2` +class could be declared in the option `fs.s3a.audit.request.handlers`; +these would be wired up into the V1 request processing pipeline. + +This option is now ignored completely, other than printing a warning message the first time a filesystem is created with a non-empty value. + + ### Rejecting out-of-span operations The logging auditor can be configured to raise an exception whenever @@ -201,8 +218,8 @@ The HTTP referrer header is attached by the logging auditor. If the S3 Bucket is configured to log requests to another bucket, then these logs entries will include the audit information _as the referrer_. -This can be parsed (consult AWS documentation for a regular expression) -and the http referrer header extracted. +The S3 Server log entries can be parsed (consult AWS documentation for a regular expression) +and the http referrer header extracted. ``` https://audit.example.org/hadoop/1/op_rename/3c0d9b7e-2a63-43d9-a220-3c574d768ef3-3/ @@ -242,13 +259,14 @@ If any of the field values were `null`, the field is omitted. _Notes_ -* Thread IDs are from the current thread in the JVM, so can be compared to those in````````` +* Thread IDs are from the current thread in the JVM, so can be compared to those in Log4J logs. They are never unique. * Task Attempt/Job IDs are only ever set during operations involving the S3A committers, specifically - all operations excecuted by the committer. + all operations executed by the committer. Operations executed in the same thread as the committer's instantiation _may_ also report the IDs, even if they are unrelated to the actual task. Consider them "best effort". +Thread IDs are generated as follows: ```java Long.toString(Thread.currentThread().getId()) ``` @@ -269,6 +287,8 @@ This is why the span ID is always passed in as part of the URL, rather than just an HTTP query parameter: even if the header is chopped, the span ID will always be present. +As of August 2023, this header is not collected in AWS CloudTrail -only S3 Server logs. + ## Privacy Implications of HTTP Referrer auditing When the S3A client makes requests of an S3 bucket, the auditor @@ -423,6 +443,12 @@ log4j.logger.org.apache.hadoop.fs.s3a.audit=TRACE This is very noisy and not recommended in normal operation. +If logging of HTTP IO is enabled then the "referer" header is printed as part of every request: +``` +log4j.logger.org.apache.http=DEBUG +log4j.logger.software.amazon.awssdk.thirdparty.org.apache.http.client.HttpClient=DEBUG +``` + ## Integration with S3A Committers Work submitted through the S3A committer will have the job (query) ID associated diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md index 6a82ec85965e1..9ac2a780ca233 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/aws_sdk_upgrade.md @@ -371,3 +371,11 @@ the interface `org.apache.hadoop.fs.s3a.audit.AWSAuditEventCallbacks` Examine the interface and associated implementations to see how to migrate. + +The option `fs.s3a.audit.request.handlers` to declare a list of v1 SDK +`com.amazonaws.handlers.RequestHandler2` implementations to include +in the AWS request chain is no longer supported: a warning is printed +and the value ignored. + +The V2 SDK equivalent, classes implementing `software.amazon.awssdk.core.interceptor.ExecutionInterceptor` +can be declared in the configuration option `fs.s3a.audit.execution.interceptors`. From afc678715eaac4085fbe0ffe140b75da47c81bc5 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Thu, 17 Aug 2023 17:33:46 +0100 Subject: [PATCH 19/20] HADOOP-18820. Audit handler migration. v1 conf load uses getTrimmed() and logs the list of values; v2 ExecutionInterceptor will, if they implement Configurable, get the auditor Configuration Change-Id: Ic0d49cbceb4a0c9fed9013c9a605f9b240801d73 --- .../s3a/audit/impl/ActiveAuditManagerS3A.java | 21 +++++++++++++------ .../hadoop/fs/s3a/impl/V2Migration.java | 8 ++++--- .../markdown/tools/hadoop-aws/auditing.md | 3 +++ .../fs/s3a/audit/ITestAuditManager.java | 10 +++++++-- .../audit/SimpleAWSExecutionInterceptor.java | 19 ++++++++++++++++- 5 files changed, 49 insertions(+), 12 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java index a45d19dfa000b..9dd04af68e8a9 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/audit/impl/ActiveAuditManagerS3A.java @@ -33,6 +33,8 @@ import software.amazon.awssdk.http.SdkHttpRequest; import software.amazon.awssdk.http.SdkHttpResponse; import software.amazon.awssdk.transfer.s3.progress.TransferListener; + +import org.apache.hadoop.conf.Configurable; import org.apache.hadoop.util.Preconditions; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -404,19 +406,26 @@ public List createExecutionInterceptors() List executionInterceptors = new ArrayList<>(); executionInterceptors.add(this); - final String handlers = getConfig().get(AUDIT_REQUEST_HANDLERS); - if (handlers != null) { - V2Migration.v1RequestHandlersUsed(); + final String handlers = getConfig().getTrimmed(AUDIT_REQUEST_HANDLERS, ""); + if (!handlers.isEmpty()) { + // warn and ignore v1 handlers. + V2Migration.v1RequestHandlersUsed(handlers); } - // TODO: should we remove this and use Global/Service interceptors, see: - // https://sdk.amazonaws.com/java/api/latest/software/amazon/awssdk/core/interceptor/ExecutionInterceptor.html + // V2 SDK supports global/service interceptors, but they need to be configured on the + // classpath and don't get the filesystem/job configuration passed down. final Class[] interceptors = getConfig().getClasses(AUDIT_EXECUTION_INTERCEPTORS); if (interceptors != null) { for (Class handler : interceptors) { try { + LOG.debug("Adding intercept of class {}", handler); Constructor ctor = handler.getConstructor(); - executionInterceptors.add((ExecutionInterceptor) ctor.newInstance()); + final ExecutionInterceptor interceptor = (ExecutionInterceptor) ctor.newInstance(); + if (interceptor instanceof Configurable) { + // pass in the configuration. + ((Configurable) interceptor).setConf(getConfig()); + } + executionInterceptors.add(interceptor); } catch (ExceptionInInitializerError e) { throw FutureIO.unwrapInnerException(e); } catch (Exception e) { diff --git a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java index 58db60d606eac..bc9b0e49a37b0 100644 --- a/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java +++ b/hadoop-tools/hadoop-aws/src/main/java/org/apache/hadoop/fs/s3a/impl/V2Migration.java @@ -23,6 +23,7 @@ import org.apache.hadoop.fs.store.LogExactlyOnce; +import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS; import static org.apache.hadoop.fs.s3a.impl.InternalConstants.SDK_V2_UPGRADE_LOG_NAME; /** @@ -50,11 +51,12 @@ private V2Migration() { } /** * Notes use of request handlers. + * @param handlers handlers declared */ - public static void v1RequestHandlersUsed() { + public static void v1RequestHandlersUsed(final String handlers) { WARN_OF_REQUEST_HANDLERS.warn( - "The request handler interface has changed in AWS SDK V2, use exception interceptors " - + "now S3A is upgraded to SDK V2"); + "Ignoring V1 SDK request handlers set in {}: {}", + AUDIT_REQUEST_HANDLERS, handlers); } } diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md index 86acceadc8eaf..3b7c8a2327785 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md @@ -172,6 +172,9 @@ all of which must implement the interface `software.amazon.awssdk.core.intercept The list of classes is set in the configuration option `fs.s3a.audit.execution.interceptors`. +Any class in the list which implements `org.apache.hadoop.conf.Configurable` will have +`Configurable.setConf()` called with the filesystem configuration passed down. + Before the upgrade to the V2 SDK, a list of extra subclasses of the AWS SDK `com.amazonaws.handlers.RequestHandler2` class could be declared in the option `fs.s3a.audit.request.handlers`; these would be wired up into the V1 request processing pipeline. diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java index bd60165ebe42e..ea7a1a34da735 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/ITestAuditManager.java @@ -34,6 +34,7 @@ import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.enableLoggingAuditor; import static org.apache.hadoop.fs.s3a.audit.AuditTestSupport.resetAuditOptions; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_EXECUTION_INTERCEPTORS; +import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.AUDIT_REQUEST_HANDLERS; import static org.apache.hadoop.fs.s3a.audit.S3AAuditConstants.UNAUDITED_OPERATION; import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.assertThatStatisticCounter; import static org.apache.hadoop.fs.statistics.IOStatisticAssertions.lookupCounterStatistic; @@ -59,6 +60,7 @@ public Configuration createConfiguration() { enableLoggingAuditor(conf); conf.set(AUDIT_EXECUTION_INTERCEPTORS, SimpleAWSExecutionInterceptor.CLASS); + conf.set(AUDIT_REQUEST_HANDLERS, "not-valid-class"); return conf; } @@ -114,8 +116,8 @@ public void testInvokeOutOfSpanRejected() throws Throwable { } @Test - public void testRequestHandlerBinding() throws Throwable { - describe("Verify that extra request handlers can be added and that they" + public void testExecutionInterceptorBinding() throws Throwable { + describe("Verify that extra ExecutionInterceptor can be added and that they" + " will be invoked during request execution"); final long baseCount = SimpleAWSExecutionInterceptor.getInvocationCount(); final S3AFileSystem fs = getFileSystem(); @@ -131,5 +133,9 @@ public void testRequestHandlerBinding() throws Throwable { .isGreaterThan(exec0); assertThatStatisticCounter(iostats(), AUDIT_FAILURE.getSymbol()) .isZero(); + Assertions.assertThat(SimpleAWSExecutionInterceptor.getStaticConf()) + .describedAs("configuratin of SimpleAWSExecutionInterceptor") + .isNotNull() + .isSameAs(fs.getConf()); } } diff --git a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSExecutionInterceptor.java b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSExecutionInterceptor.java index 8014b05187387..bf9b90bcdf31e 100644 --- a/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSExecutionInterceptor.java +++ b/hadoop-tools/hadoop-aws/src/test/java/org/apache/hadoop/fs/s3a/audit/SimpleAWSExecutionInterceptor.java @@ -24,17 +24,23 @@ import software.amazon.awssdk.core.interceptor.ExecutionAttributes; import software.amazon.awssdk.core.interceptor.ExecutionInterceptor; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.conf.Configured; + /** * Simple AWS interceptor to verify dynamic loading of extra * execution interceptors during auditing setup. * The invocation counter tracks the count of calls to * {@link #beforeExecution}. */ -public final class SimpleAWSExecutionInterceptor implements ExecutionInterceptor { +public final class SimpleAWSExecutionInterceptor extends Configured + implements ExecutionInterceptor { public static final String CLASS = "org.apache.hadoop.fs.s3a.audit.SimpleAWSExecutionInterceptor"; + private static Configuration staticConf; + /** Count of invocations. */ private static final AtomicLong INVOCATIONS = new AtomicLong(0); @@ -42,6 +48,7 @@ public final class SimpleAWSExecutionInterceptor implements ExecutionInterceptor public void beforeExecution(Context.BeforeExecution context, ExecutionAttributes executionAttributes) { INVOCATIONS.incrementAndGet(); + staticConf = getConf(); } /** @@ -51,4 +58,14 @@ public void beforeExecution(Context.BeforeExecution context, public static long getInvocationCount() { return INVOCATIONS.get(); } + + /** + * get the static conf, which is set the config of the + * last executor invoked. + * @return the static configuration. + */ + + public static Configuration getStaticConf() { + return staticConf; + } } From d89d95e4bbd91a6b637760bc006f654331b82845 Mon Sep 17 00:00:00 2001 From: Steve Loughran Date: Fri, 18 Aug 2023 11:15:35 +0100 Subject: [PATCH 20/20] HADOOP-18820. EOLs in auditing.md Change-Id: Ib6125d34fa51a4af26f679339598241a6fb42fc8 --- .../src/site/markdown/tools/hadoop-aws/auditing.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md index 3b7c8a2327785..9d424bc2d8c05 100644 --- a/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md +++ b/hadoop-tools/hadoop-aws/src/site/markdown/tools/hadoop-aws/auditing.md @@ -222,7 +222,7 @@ If the S3 Bucket is configured to log requests to another bucket, then these log entries will include the audit information _as the referrer_. The S3 Server log entries can be parsed (consult AWS documentation for a regular expression) -and the http referrer header extracted. +and the http referrer header extracted. ``` https://audit.example.org/hadoop/1/op_rename/3c0d9b7e-2a63-43d9-a220-3c574d768ef3-3/ @@ -262,14 +262,15 @@ If any of the field values were `null`, the field is omitted. _Notes_ -* Thread IDs are from the current thread in the JVM, so can be compared to those in +* Thread IDs are from the current thread in the JVM, so can be compared to those in Log4J logs. They are never unique. * Task Attempt/Job IDs are only ever set during operations involving the S3A committers, specifically all operations executed by the committer. Operations executed in the same thread as the committer's instantiation _may_ also report the IDs, even if they are unrelated to the actual task. Consider them "best effort". -Thread IDs are generated as follows: +Thread IDs are generated as follows: + ```java Long.toString(Thread.currentThread().getId()) ```