Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,11 @@ at locations that better optimize for object storage.

- Introduced bootstrap command options to specify custom schema files for database initialization.

- Added refresh credentials endpoint configuration to LoadTableResponse for AWS, Azure, and GCP. Enabling
automatic storage credential refresh per table on the client side. Java client version >= 1.8.0 is required.
The endpoint path is always returned when using vended credentials, but clients must enable the
Comment on lines +84 to +86
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We will need to document this. Unfortunately, the best place to doc this is a page for storage setting and credential vending, which doesn't exist yet. Here is the issue filed long time ago, #1325. Not a blocker for this PR though.

refresh-credentials flag for the desired storage provider.

### Changes

- Polaris Management API clients must be prepared to deal with new attributes in `AwsStorageConfigInfo` objects.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1600,7 +1600,8 @@ private void revokeGrantRecord(
PolarisEntityType entityType,
boolean allowListOperation,
@Nonnull Set<String> allowedReadLocations,
@Nonnull Set<String> allowedWriteLocations) {
@Nonnull Set<String> allowedWriteLocations,
Optional<String> refreshCredentialsEndpoint) {

// get meta store session we should be using
BasePersistence ms = callCtx.getMetaStore();
Expand Down Expand Up @@ -1642,7 +1643,8 @@ private void revokeGrantRecord(
callCtx.getRealmConfig(),
allowListOperation,
allowedReadLocations,
allowedWriteLocations);
allowedWriteLocations,
refreshCredentialsEndpoint);
return new ScopedCredentialsResult(accessConfig);
} catch (Exception ex) {
return new ScopedCredentialsResult(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -327,15 +327,17 @@ public ScopedCredentialsResult getSubscopedCredsForEntity(
PolarisEntityType entityType,
boolean allowListOperation,
@Nonnull Set<String> allowedReadLocations,
@Nonnull Set<String> allowedWriteLocations) {
@Nonnull Set<String> allowedWriteLocations,
Optional<String> refreshCredentialsEndpoint) {
return delegate.getSubscopedCredsForEntity(
callCtx,
catalogId,
entityId,
entityType,
allowListOperation,
allowedReadLocations,
allowedWriteLocations);
allowedWriteLocations,
refreshCredentialsEndpoint);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2022,7 +2022,8 @@ private PolarisEntityResolver resolveSecurableToRoleGrant(
PolarisEntityType entityType,
boolean allowListOperation,
@Nonnull Set<String> allowedReadLocations,
@Nonnull Set<String> allowedWriteLocations) {
@Nonnull Set<String> allowedWriteLocations,
Optional<String> refreshCredentialsEndpoint) {

// get meta store session we should be using
TransactionalPersistence ms = ((TransactionalPersistence) callCtx.getMetaStore());
Expand Down Expand Up @@ -2055,7 +2056,8 @@ private PolarisEntityResolver resolveSecurableToRoleGrant(
callCtx.getRealmConfig(),
allowListOperation,
allowedReadLocations,
allowedWriteLocations);
allowedWriteLocations,
refreshCredentialsEndpoint);
return new ScopedCredentialsResult(accessConfig);
} catch (Exception ex) {
return new ScopedCredentialsResult(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,17 @@ public String genericTables(Namespace ns) {
"polaris", "v1", prefix, "namespaces", RESTUtil.encodeNamespace(ns), "generic-tables");
}

public String credentialsPath(TableIdentifier ident) {
return SLASH.join(
"v1",
prefix,
"namespaces",
RESTUtil.encodeNamespace(ident.namespace()),
"tables",
RESTUtil.encodeString(ident.name()),
"credentials");
Comment on lines +60 to +68
Copy link
Contributor

@dimas-b dimas-b Aug 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just to confirm: relative URIs are correct in this case, right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, from what I understand the recent version of the client automatically appends the base URI if a relative path is returned from the server.

}

public String genericTable(TableIdentifier ident) {
return SLASH.join(
"polaris",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.polaris.core.storage;

import jakarta.annotation.Nonnull;
import java.util.Optional;
import java.util.Set;
import org.apache.polaris.core.PolarisCallContext;
import org.apache.polaris.core.entity.PolarisEntityType;
Expand All @@ -37,6 +38,10 @@ public interface PolarisCredentialVendor {
* allowedWriteLocations
* @param allowedReadLocations a set of allowed to read locations
* @param allowedWriteLocations a set of allowed to write locations
* @param refreshCredentialsEndpoint an optional endpoint to use for refreshing credentials. If
* supported by the storage type it will be returned to the client in the appropriate
* properties. The endpoint may be relative to the base URI and the client is responsible for
Copy link
Contributor

@singhpk234 singhpk234 Aug 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we return anything except relative path ? my understanding is base uri is always catalog uri https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/s3/VendedCredentialsProvider.java#L92 is this statement in intentionally open ended since spec is not explicit ?

Copy link
Contributor Author

@jasonf20 jasonf20 Aug 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

At one point the client required full paths, now it works with partial paths. I'm not sure if the spec identifies one as the correct method over the other so I left it like this. If one is definitely the "correct" way I can update the comment.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jasonf20 : do you know the Iceberg version / PR where relative paths started working?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It was linked in the previous PR. #1164 (comment)

Seems like it's since 1.8.0

Copy link
Contributor

@singhpk234 singhpk234 Aug 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh no this made a lot of things super tricky, i checked iceberg python for refresh keyword but i didn't any prs for that so i am assuming that it doesn't support, taking only the iceberg java sdk reference
if we always send relative url it only works for >= 1.8
fails for < 1.8
I know there is a header that the java sdk sends which has SDK version X-Client-Version header we may want to salvage this now on when to return the absolute vs relative, https://github.com/apache/iceberg/blob/main/core/src/main/java/org/apache/iceberg/rest/HTTPClient.java#L77

seems like the only broken release would be 1.7

Its a bit surprising to me, but i think for now we can park this by just documenting this gotcha, WDYT @dimas-b ?

Copy link
Contributor

@dimas-b dimas-b Aug 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Getting full URI is a bit trickier - as discussed in #1164.

From my POV, let's merge this "as is" and open a GH issue that it does not work properly for Iceberg < 1.8. When someone has time, let's improve.

Getting this feature enabled for Iceberg clients >= 1.8 is valuable, I think.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since old client version behave differently, I think we ought to have a feature flag now... just in case someone runs into client incompatibility issues and wants to disable credential refresh properties completely (i.e. revert to old Polaris behaviour). WDYT?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would be ideal IMHO (specially now), but i think the jobs without us sending the credentials endpoint would still have failed because my understanding is we would have not made the /credentials call unless the creds in loadTable expires https://github.com/apache/iceberg/blob/main/aws/src/main/java/org/apache/iceberg/aws/s3/VendedCredentialsProvider.java#L63, its just we might see an HTTP error rather than the creds expired exception.
But agree, we might want to have a feature flag to fallback to the old behaviour if we unexpectedly fail.

Copy link
Contributor Author

@jasonf20 jasonf20 Aug 26, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think a feature flag is needed.

  1. The client is already broken, if they reach the need to refresh credentials it already fails today.
  2. A feature flag will not solve the problem. Enabling it will fix the issue for some and create a different issue for others, so you'll never be able to enable it anymore than you can without the flag.
  3. Clients can simply upgrade to 1.8.0 to get a working version. Their current version doesn't work anyway so they can upgrade at their own time when this becomes important to them.
  4. If you would like we can remove the refresh_credentials_enabled property and let the client set that one in his catalog settings. That way he can control if this gets used or not. This would be better than a flag and is controlled client side. not server side.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added a commit that does 4.

* handling the relative path
* @return an enum map containing the scoped credentials
*/
@Nonnull
Expand All @@ -47,5 +52,6 @@ ScopedCredentialsResult getSubscopedCredsForEntity(
PolarisEntityType entityType,
boolean allowListOperation,
@Nonnull Set<String> allowedReadLocations,
@Nonnull Set<String> allowedWriteLocations);
@Nonnull Set<String> allowedWriteLocations,
Optional<String> refreshCredentialsEndpoint);
}
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

import jakarta.annotation.Nonnull;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import org.apache.polaris.core.config.RealmConfig;

Expand Down Expand Up @@ -55,13 +56,18 @@ public String getStorageIdentifierOrId() {
* locations
* @param allowedReadLocations a set of allowed to read locations
* @param allowedWriteLocations a set of allowed to write locations
* @param refreshCredentialsEndpoint an optional endpoint to use for refreshing credentials. If
* supported by the storage type it will be returned to the client in the appropriate
* properties. The endpoint may be relative to the base URI and the client is responsible for
* handling the relative path
* @return An enum map including the scoped credentials
*/
public abstract AccessConfig getSubscopedCreds(
@Nonnull RealmConfig realmConfig,
boolean allowListOperation,
@Nonnull Set<String> allowedReadLocations,
@Nonnull Set<String> allowedWriteLocations);
@Nonnull Set<String> allowedWriteLocations,
Optional<String> refreshCredentialsEndpoint);

/**
* Validate access for the provided operation actions and locations.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
*/
package org.apache.polaris.core.storage;

import org.apache.iceberg.aws.AwsClientProperties;
import org.apache.iceberg.gcp.GCPProperties;

/**
* A subset of Iceberg catalog properties recognized by Polaris.
*
Expand All @@ -39,6 +42,12 @@ public enum StorageAccessProperty {
Boolean.class, "s3.path-style-access", "whether to use S3 path style access", false),
CLIENT_REGION(
String.class, "client.region", "region to configure client for making requests to AWS"),
AWS_REFRESH_CREDENTIALS_ENDPOINT(
String.class,
AwsClientProperties.REFRESH_CREDENTIALS_ENDPOINT,
"the endpoint to load vended credentials for a table from the catalog",
false,
false),

GCS_ACCESS_TOKEN(String.class, "gcs.oauth2.token", "the gcs scoped access token"),
GCS_ACCESS_TOKEN_EXPIRES_AT(
Expand All @@ -47,11 +56,23 @@ public enum StorageAccessProperty {
"the time the gcs access token expires, in milliseconds",
true,
true),
GCS_REFRESH_CREDENTIALS_ENDPOINT(
String.class,
GCPProperties.GCS_OAUTH2_REFRESH_CREDENTIALS_ENDPOINT,
"the endpoint to load vended credentials for a table from the catalog",
false,
false),

// Currently not using ACCESS TOKEN as the ResolvingFileIO is using ADLSFileIO for azure case and
// it expects for SAS
AZURE_ACCESS_TOKEN(String.class, "", "the azure scoped access token"),
AZURE_SAS_TOKEN(String.class, "adls.sas-token.", "an azure shared access signature token"),
AZURE_REFRESH_CREDENTIALS_ENDPOINT(
String.class,
"adls.refresh-credentials-endpoint",
"the endpoint to load vended credentials for a table from the catalog",
false,
false),
EXPIRATION_TIME(
Long.class,
"expiration-time",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,8 @@ public AccessConfig getSubscopedCreds(
@Nonnull RealmConfig realmConfig,
boolean allowListOperation,
@Nonnull Set<String> allowedReadLocations,
@Nonnull Set<String> allowedWriteLocations) {
@Nonnull Set<String> allowedWriteLocations,
Optional<String> refreshCredentialsEndpoint) {
int storageCredentialDurationSeconds =
realmConfig.getConfig(STORAGE_CREDENTIAL_DURATION_SECONDS);
AwsStorageConfigurationInfo storageConfig = config();
Expand Down Expand Up @@ -120,6 +121,11 @@ public AccessConfig getSubscopedCreds(
accessConfig.put(StorageAccessProperty.CLIENT_REGION, region);
}

refreshCredentialsEndpoint.ifPresent(
endpoint -> {
accessConfig.put(StorageAccessProperty.AWS_REFRESH_CREDENTIALS_ENDPOINT, endpoint);
});

URI endpointUri = storageConfig.getEndpointUri();
if (endpointUri != null) {
accessConfig.put(StorageAccessProperty.AWS_ENDPOINT, endpointUri.toString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@
import java.time.temporal.ChronoUnit;
import java.util.HashSet;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import org.apache.polaris.core.config.RealmConfig;
import org.apache.polaris.core.storage.AccessConfig;
Expand Down Expand Up @@ -76,7 +77,8 @@ public AccessConfig getSubscopedCreds(
@Nonnull RealmConfig realmConfig,
boolean allowListOperation,
@Nonnull Set<String> allowedReadLocations,
@Nonnull Set<String> allowedWriteLocations) {
@Nonnull Set<String> allowedWriteLocations,
Optional<String> refreshCredentialsEndpoint) {
String loc =
!allowedWriteLocations.isEmpty()
? allowedWriteLocations.stream().findAny().orElse(null)
Expand Down Expand Up @@ -169,15 +171,24 @@ public AccessConfig getSubscopedCreds(
String.format("Endpoint %s not supported", location.getEndpoint()));
}

return toAccessConfig(sasToken, storageDnsName, sanitizedEndTime.toInstant());
return toAccessConfig(
sasToken, storageDnsName, sanitizedEndTime.toInstant(), refreshCredentialsEndpoint);
}

@VisibleForTesting
static AccessConfig toAccessConfig(String sasToken, String storageDnsName, Instant expiresAt) {
static AccessConfig toAccessConfig(
String sasToken,
String storageDnsName,
Instant expiresAt,
Optional<String> refreshCredentialsEndpoint) {
AccessConfig.Builder accessConfig = AccessConfig.builder();
handleAzureCredential(accessConfig, sasToken, storageDnsName);
accessConfig.put(
StorageAccessProperty.EXPIRATION_TIME, String.valueOf(expiresAt.toEpochMilli()));
refreshCredentialsEndpoint.ifPresent(
endpoint -> {
accessConfig.put(StorageAccessProperty.AZURE_REFRESH_CREDENTIALS_ENDPOINT, endpoint);
});
return accessConfig.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,8 @@ public AccessConfig getOrGenerateSubScopeCreds(
@Nonnull PolarisEntity polarisEntity,
boolean allowListOperation,
@Nonnull Set<String> allowedReadLocations,
@Nonnull Set<String> allowedWriteLocations) {
@Nonnull Set<String> allowedWriteLocations,
Optional<String> refreshCredentialsEndpoint) {
if (!isTypeSupported(polarisEntity.getType())) {
callCtx
.getDiagServices()
Expand All @@ -117,7 +118,8 @@ public AccessConfig getOrGenerateSubScopeCreds(
polarisEntity,
allowListOperation,
allowedReadLocations,
allowedWriteLocations);
allowedWriteLocations,
refreshCredentialsEndpoint);
LOGGER.atDebug().addKeyValue("key", key).log("subscopedCredsCache");
Function<StorageCredentialCacheKey, StorageCredentialCacheEntry> loader =
k -> {
Expand All @@ -130,7 +132,8 @@ public AccessConfig getOrGenerateSubScopeCreds(
polarisEntity.getType(),
k.allowedListAction(),
k.allowedReadLocations(),
k.allowedWriteLocations());
k.allowedWriteLocations(),
k.refreshCredentialsEndpoint());
if (scopedCredentialsResult.isSuccess()) {
long maxCacheDurationMs = maxCacheDurationMs(callCtx.getRealmConfig());
return new StorageCredentialCacheEntry(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
package org.apache.polaris.core.storage.cache;

import jakarta.annotation.Nullable;
import java.util.Optional;
import java.util.Set;
import org.apache.polaris.core.entity.PolarisEntity;
import org.apache.polaris.core.entity.PolarisEntityConstants;
Expand Down Expand Up @@ -47,12 +48,16 @@ public interface StorageCredentialCacheKey {
@Value.Parameter(order = 6)
Set<String> allowedWriteLocations();

@Value.Parameter(order = 7)
Optional<String> refreshCredentialsEndpoint();

static StorageCredentialCacheKey of(
String realmId,
PolarisEntity entity,
boolean allowedListAction,
Set<String> allowedReadLocations,
Set<String> allowedWriteLocations) {
Set<String> allowedWriteLocations,
Optional<String> refreshCredentialsEndpoint) {
String storageConfigSerializedStr =
entity
.getInternalPropertiesAsMap()
Expand All @@ -63,6 +68,7 @@ static StorageCredentialCacheKey of(
storageConfigSerializedStr,
allowedListAction,
allowedReadLocations,
allowedWriteLocations);
allowedWriteLocations,
refreshCredentialsEndpoint);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Stream;
import org.apache.polaris.core.config.RealmConfig;
Expand Down Expand Up @@ -75,7 +76,8 @@ public AccessConfig getSubscopedCreds(
@Nonnull RealmConfig realmConfig,
boolean allowListOperation,
@Nonnull Set<String> allowedReadLocations,
@Nonnull Set<String> allowedWriteLocations) {
@Nonnull Set<String> allowedWriteLocations,
Optional<String> refreshCredentialsEndpoint) {
try {
sourceCredentials.refresh();
} catch (IOException e) {
Expand Down Expand Up @@ -112,6 +114,12 @@ public AccessConfig getSubscopedCreds(
accessConfig.put(
StorageAccessProperty.GCS_ACCESS_TOKEN_EXPIRES_AT,
String.valueOf(token.getExpirationTime().getTime()));

refreshCredentialsEndpoint.ifPresent(
endpoint -> {
accessConfig.put(StorageAccessProperty.GCS_REFRESH_CREDENTIALS_ENDPOINT, endpoint);
});

return accessConfig.build();
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import jakarta.annotation.Nonnull;
import jakarta.annotation.Nullable;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import org.apache.polaris.core.config.PolarisConfigurationStore;
import org.apache.polaris.core.config.RealmConfig;
Expand Down Expand Up @@ -197,7 +198,8 @@ public AccessConfig getSubscopedCreds(
@Nonnull RealmConfig realmConfig,
boolean allowListOperation,
@Nonnull Set<String> allowedReadLocations,
@Nonnull Set<String> allowedWriteLocations) {
@Nonnull Set<String> allowedWriteLocations,
Optional<String> refreshCredentialsEndpoint) {
return null;
}
}
Expand Down
Loading