Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
da0ede6
add etag
Apr 9, 2025
3097ffb
bump to main
Apr 10, 2025
425cbbc
Merge branch 'main' into etag
Apr 10, 2025
1480d50
update submodules
Apr 10, 2025
9b85b41
update create secret info
Apr 11, 2025
22a0387
Merge pull request #41 from lnkuiper/etag
lnkuiper Apr 11, 2025
3059c0d
Read info from S3 glob and push into OpenFileInfo
Mytherin Apr 12, 2025
0f1d510
Read etag, last_modified and file size during S3 glob and use that in…
Mytherin Apr 14, 2025
6bee4d9
Merge pull request #45 from Mytherin/s3globpushinfo
Mytherin Apr 14, 2025
b0c9960
Apply HTTPUtil patch
Mytherin May 13, 2025
e9ee26d
Merge pull request #49 from Mytherin/httputilupdate
Mytherin May 13, 2025
23824a3
Use HTTPException for HTTP errors in GetRequest and GetRangeRequest
Flogex May 13, 2025
a677e3c
Merge pull request #36 from Flogex/content-length-http-exception
Mytherin May 13, 2025
134e7ef
Better S3 authentication errors and run formatter
Mytherin May 13, 2025
d6c3eb0
Merge pull request #50 from Mytherin/s3error
Mytherin May 13, 2025
a36d4d3
Fix CMake also for build_loadable_extension
carlopi May 14, 2025
3def3e7
pragma once on httpfs_client.hpp
carlopi May 14, 2025
9e49f07
Apply changes from https://github.com/duckdb/duckdb/pull/17486
carlopi May 14, 2025
6a09982
Bump to latest duckdb
carlopi May 15, 2025
5d76c4b
Merge pull request #51 from carlopi/httpfsutils_fix_compilation
Mytherin May 15, 2025
4cf8a59
Merge pull request #52 from carlopi/httputil_port_changes
Mytherin May 15, 2025
31a64cf
Apply logger patch
Mytherin May 15, 2025
eb1b049
Merge pull request #54 from Mytherin/patch
Mytherin May 15, 2025
857a9ab
Absorb patch from https://github.com/duckdb/duckdb/pull/17527
carlopi May 19, 2025
c0f4eb5
Merge pull request #56 from carlopi/fix_update_extensions
Mytherin May 19, 2025
73b0bdc
set secret type extension
samansmink May 19, 2025
be31632
Merge pull request #57 from samansmink/set-secret-type
samansmink May 20, 2025
297ca06
Update test_secret_type.test: explicit load
carlopi May 20, 2025
217ec8e
Merge pull request #59 from duckdb/patch-test_secret_type
carlopi May 20, 2025
14b0d82
Use correct http_proto in s3 glob
Mytherin May 22, 2025
00a2697
Merge pull request #60 from Mytherin/s3fsfullproto
Mytherin May 22, 2025
85a4c0e
Bump submodules AND workflows to 1.3.0
carlopi May 23, 2025
7ce5308
Merge pull request #62 from carlopi/bump_duckdb
Mytherin May 23, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions .github/workflows/MainDistributionPipeline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,19 +14,19 @@ concurrency:
jobs:
duckdb-stable-build:
name: Build extension binaries
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@v1.2.1
uses: duckdb/extension-ci-tools/.github/workflows/_extension_distribution.yml@main
with:
extension_name: httpfs
duckdb_version: v1.2.1
ci_tools_version: v1.2.1
duckdb_version: v1.3.0
ci_tools_version: main

duckdb-stable-deploy:
name: Deploy extension binaries
needs: duckdb-stable-build
uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@v1.2.1
uses: duckdb/extension-ci-tools/.github/workflows/_extension_deploy.yml@main
secrets: inherit
with:
extension_name: httpfs
duckdb_version: v1.2.1
ci_tools_version: v1.2.1
deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
duckdb_version: v1.3.0
ci_tools_version: main
deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }}
3 changes: 2 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ build_static_extension(
extension/httpfs/hffs.cpp
extension/httpfs/s3fs.cpp
extension/httpfs/httpfs.cpp
extension/httpfs/httpfs_client.cpp
extension/httpfs/http_state.cpp
extension/httpfs/crypto.cpp
extension/httpfs/create_secret_functions.cpp
Expand All @@ -23,10 +24,10 @@ set(PARAMETERS "-warnings")
build_loadable_extension(
httpfs
${PARAMETERS}
extension/httpfs/httpfs
extension/httpfs/hffs.cpp
extension/httpfs/s3fs.cpp
extension/httpfs/httpfs.cpp
extension/httpfs/httpfs_client.cpp
extension/httpfs/http_state.cpp
extension/httpfs/crypto.cpp
extension/httpfs/create_secret_functions.cpp
Expand Down
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 2460 files
27 changes: 18 additions & 9 deletions extension/httpfs/create_secret_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ void CreateS3SecretFunctions::Register(DatabaseInstance &instance) {
RegisterCreateSecretFunction(instance, "gcs");
}

static Value MapToStruct(const Value &map){
static Value MapToStruct(const Value &map) {
auto children = MapValue::GetChildren(map);

child_list_t<Value> struct_fields;
Expand Down Expand Up @@ -109,17 +109,21 @@ unique_ptr<BaseSecret> CreateS3SecretFunctions::CreateSecretFunctionInternal(Cli
refresh = true;
secret->secret_map["refresh_info"] = MapToStruct(named_param.second);
} else {
throw InvalidInputException("Unknown named parameter passed to CreateSecretFunctionInternal: " + lower_name);
throw InvalidInputException("Unknown named parameter passed to CreateSecretFunctionInternal: " +
lower_name);
}
}

return std::move(secret);
}

CreateSecretInfo CreateS3SecretFunctions::GenerateRefreshSecretInfo(const SecretEntry &secret_entry, Value &refresh_info) {
const auto &kv_secret = dynamic_cast<const KeyValueSecret&>(*secret_entry.secret);
CreateSecretInput CreateS3SecretFunctions::GenerateRefreshSecretInfo(const SecretEntry &secret_entry,
Value &refresh_info) {
const auto &kv_secret = dynamic_cast<const KeyValueSecret &>(*secret_entry.secret);

CreateSecretInfo result(OnCreateConflict::REPLACE_ON_CONFLICT, secret_entry.persist_type);
CreateSecretInput result;
result.on_conflict = OnCreateConflict::REPLACE_ON_CONFLICT;
result.persist_type = SecretPersistType::TEMPORARY;

result.type = kv_secret.GetType();
result.name = kv_secret.GetName();
Expand All @@ -141,7 +145,7 @@ CreateSecretInfo CreateS3SecretFunctions::GenerateRefreshSecretInfo(const Secret

//! Function that will automatically try to refresh a secret
bool CreateS3SecretFunctions::TryRefreshS3Secret(ClientContext &context, const SecretEntry &secret_to_refresh) {
const auto &kv_secret = dynamic_cast<const KeyValueSecret&>(*secret_to_refresh.secret);
const auto &kv_secret = dynamic_cast<const KeyValueSecret &>(*secret_to_refresh.secret);

Value refresh_info;
if (!kv_secret.TryGetValue("refresh_info", refresh_info)) {
Expand All @@ -153,12 +157,15 @@ bool CreateS3SecretFunctions::TryRefreshS3Secret(ClientContext &context, const S
// TODO: change SecretManager API to avoid requiring catching this exception
try {
auto res = secret_manager.CreateSecret(context, refresh_input);
auto &new_secret = dynamic_cast<const KeyValueSecret&>(*res->secret);
DUCKDB_LOG_INFO(context, "httpfs.SecretRefresh", "Successfully refreshed secret: %s, new key_id: %s", secret_to_refresh.secret->GetName(), new_secret.TryGetValue("key_id").ToString());
auto &new_secret = dynamic_cast<const KeyValueSecret &>(*res->secret);
DUCKDB_LOG_INFO(context, "Successfully refreshed secret: %s, new key_id: %s",
secret_to_refresh.secret->GetName(), new_secret.TryGetValue("key_id").ToString());
return true;
} catch (std::exception &ex) {
ErrorData error(ex);
string new_message = StringUtil::Format("Exception thrown while trying to refresh secret %s. To fix this, please recreate or remove the secret and try again. Error: '%s'", secret_to_refresh.secret->GetName(), error.Message());
string new_message = StringUtil::Format("Exception thrown while trying to refresh secret %s. To fix this, "
"please recreate or remove the secret and try again. Error: '%s'",
secret_to_refresh.secret->GetName(), error.Message());
throw Exception(error.Type(), new_message);
}
}
Expand Down Expand Up @@ -204,6 +211,7 @@ void CreateS3SecretFunctions::RegisterCreateSecretFunction(DatabaseInstance &ins
secret_type.name = type;
secret_type.deserializer = KeyValueSecret::Deserialize<KeyValueSecret>;
secret_type.default_provider = "config";
secret_type.extension = "httpfs";

ExtensionUtil::RegisterSecretType(instance, secret_type);

Expand All @@ -218,6 +226,7 @@ void CreateBearerTokenFunctions::Register(DatabaseInstance &instance) {
secret_type_hf.name = HUGGINGFACE_TYPE;
secret_type_hf.deserializer = KeyValueSecret::Deserialize<KeyValueSecret>;
secret_type_hf.default_provider = "config";
secret_type_hf.extension = "httpfs";
ExtensionUtil::RegisterSecretType(instance, secret_type_hf);

// Huggingface config provider
Expand Down
125 changes: 86 additions & 39 deletions extension/httpfs/crypto.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,68 +31,81 @@ void hex256(hash_bytes &in, hash_str &out) {
}
}

const EVP_CIPHER *GetCipher(const string &key) {
// For now, we only support GCM ciphers
switch (key.size()) {
case 16:
return EVP_aes_128_gcm();
case 24:
return EVP_aes_192_gcm();
case 32:
return EVP_aes_256_gcm();
default:
throw InternalException("Invalid AES key length");
}
}

AESGCMStateSSL::AESGCMStateSSL() : gcm_context(EVP_CIPHER_CTX_new()) {
if (!(gcm_context)) {
AESStateSSL::AESStateSSL(const std::string *key) : context(EVP_CIPHER_CTX_new()) {
if (!(context)) {
throw InternalException("AES GCM failed with initializing context");
}
}

AESGCMStateSSL::~AESGCMStateSSL() {
AESStateSSL::~AESStateSSL() {
// Clean up
EVP_CIPHER_CTX_free(gcm_context);
EVP_CIPHER_CTX_free(context);
}

bool AESGCMStateSSL::IsOpenSSL() {
return ssl;
const EVP_CIPHER *AESStateSSL::GetCipher(const string &key) {

switch (cipher) {
case GCM:
switch (key.size()) {
case 16:
return EVP_aes_128_gcm();
case 24:
return EVP_aes_192_gcm();
case 32:
return EVP_aes_256_gcm();
default:
throw InternalException("Invalid AES key length");
}
case CTR:
switch (key.size()) {
case 16:
return EVP_aes_128_ctr();
case 24:
return EVP_aes_192_ctr();
case 32:
return EVP_aes_256_ctr();
default:
throw InternalException("Invalid AES key length");
}

default:
throw duckdb::InternalException("Invalid Encryption/Decryption Cipher: %d", static_cast<int>(cipher));
}
}

void AESGCMStateSSL::GenerateRandomData(data_ptr_t data, idx_t len) {
void AESStateSSL::GenerateRandomData(data_ptr_t data, idx_t len) {
// generate random bytes for nonce
RAND_bytes(data, len);
}

void AESGCMStateSSL::InitializeEncryption(const_data_ptr_t iv, idx_t iv_len, const string *key) {
void AESStateSSL::InitializeEncryption(const_data_ptr_t iv, idx_t iv_len, const string *key) {
mode = ENCRYPT;

if (1 != EVP_EncryptInit_ex(gcm_context, GetCipher(*key), NULL, const_data_ptr_cast(key->data()), iv)) {
if (1 != EVP_EncryptInit_ex(context, GetCipher(*key), NULL, const_data_ptr_cast(key->data()), iv)) {
throw InternalException("EncryptInit failed");
}
}

void AESGCMStateSSL::InitializeDecryption(const_data_ptr_t iv, idx_t iv_len, const string *key) {
void AESStateSSL::InitializeDecryption(const_data_ptr_t iv, idx_t iv_len, const string *key) {
mode = DECRYPT;

if (1 != EVP_DecryptInit_ex(gcm_context, GetCipher(*key), NULL, const_data_ptr_cast(key->data()), iv)) {
if (1 != EVP_DecryptInit_ex(context, GetCipher(*key), NULL, const_data_ptr_cast(key->data()), iv)) {
throw InternalException("DecryptInit failed");
}
}

size_t AESGCMStateSSL::Process(const_data_ptr_t in, idx_t in_len, data_ptr_t out, idx_t out_len) {
size_t AESStateSSL::Process(const_data_ptr_t in, idx_t in_len, data_ptr_t out, idx_t out_len) {

switch (mode) {
case ENCRYPT:
if (1 != EVP_EncryptUpdate(gcm_context, data_ptr_cast(out), reinterpret_cast<int *>(&out_len),
if (1 != EVP_EncryptUpdate(context, data_ptr_cast(out), reinterpret_cast<int *>(&out_len),
const_data_ptr_cast(in), (int)in_len)) {
throw InternalException("EncryptUpdate failed");
}
break;

case DECRYPT:
if (1 != EVP_DecryptUpdate(gcm_context, data_ptr_cast(out), reinterpret_cast<int *>(&out_len),
if (1 != EVP_DecryptUpdate(context, data_ptr_cast(out), reinterpret_cast<int *>(&out_len),
const_data_ptr_cast(in), (int)in_len)) {

throw InternalException("DecryptUpdate failed");
Expand All @@ -107,30 +120,30 @@ size_t AESGCMStateSSL::Process(const_data_ptr_t in, idx_t in_len, data_ptr_t out
return out_len;
}

size_t AESGCMStateSSL::Finalize(data_ptr_t out, idx_t out_len, data_ptr_t tag, idx_t tag_len) {
size_t AESStateSSL::FinalizeGCM(data_ptr_t out, idx_t out_len, data_ptr_t tag, idx_t tag_len) {
auto text_len = out_len;

switch (mode) {
case ENCRYPT:
{
if (1 != EVP_EncryptFinal_ex(gcm_context, data_ptr_cast(out) + out_len, reinterpret_cast<int *>(&out_len))) {
case ENCRYPT: {
if (1 != EVP_EncryptFinal_ex(context, data_ptr_cast(out) + out_len, reinterpret_cast<int *>(&out_len))) {
throw InternalException("EncryptFinal failed");
}
text_len += out_len;

// The computed tag is written at the end of a chunk
if (1 != EVP_CIPHER_CTX_ctrl(gcm_context, EVP_CTRL_GCM_GET_TAG, tag_len, tag)) {
if (1 != EVP_CIPHER_CTX_ctrl(context, EVP_CTRL_GCM_GET_TAG, tag_len, tag)) {
throw InternalException("Calculating the tag failed");
}
return text_len;
}
case DECRYPT:
{
case DECRYPT: {
// Set expected tag value
if (!EVP_CIPHER_CTX_ctrl(gcm_context, EVP_CTRL_GCM_SET_TAG, tag_len, tag)) {
if (!EVP_CIPHER_CTX_ctrl(context, EVP_CTRL_GCM_SET_TAG, tag_len, tag)) {
throw InternalException("Finalizing tag failed");
}

// EVP_DecryptFinal() will return an error code if final block is not correctly formatted.
int ret = EVP_DecryptFinal_ex(gcm_context, data_ptr_cast(out) + out_len, reinterpret_cast<int *>(&out_len));
int ret = EVP_DecryptFinal_ex(context, data_ptr_cast(out) + out_len, reinterpret_cast<int *>(&out_len));
text_len += out_len;

if (ret > 0) {
Expand All @@ -144,12 +157,46 @@ size_t AESGCMStateSSL::Finalize(data_ptr_t out, idx_t out_len, data_ptr_t tag, i
}
}

size_t AESStateSSL::Finalize(data_ptr_t out, idx_t out_len, data_ptr_t tag, idx_t tag_len) {

if (cipher == GCM) {
return FinalizeGCM(out, out_len, tag, tag_len);
}

auto text_len = out_len;
switch (mode) {

case ENCRYPT: {
if (1 != EVP_EncryptFinal_ex(context, data_ptr_cast(out) + out_len, reinterpret_cast<int *>(&out_len))) {
throw InternalException("EncryptFinal failed");
}

return text_len += out_len;
}

case DECRYPT: {
// EVP_DecryptFinal() will return an error code if final block is not correctly formatted.
int ret = EVP_DecryptFinal_ex(context, data_ptr_cast(out) + out_len, reinterpret_cast<int *>(&out_len));
text_len += out_len;

if (ret > 0) {
// success
return text_len;
}

throw InvalidInputException("Computed AES tag differs from read AES tag, are you using the right key?");
}
default:
throw InternalException("Unhandled encryption mode %d", static_cast<int>(mode));
}
}

} // namespace duckdb

extern "C" {

// Call the member function through the factory object
DUCKDB_EXTENSION_API AESGCMStateSSLFactory *CreateSSLFactory() {
return new AESGCMStateSSLFactory();
DUCKDB_EXTENSION_API AESStateSSLFactory *CreateSSLFactory() {
return new AESStateSSLFactory();
};
}
Loading
Loading