From 3251e7c5c90b4fbbe16a371e5679706e3f8beac9 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 2 Jul 2025 14:25:43 +0200 Subject: [PATCH 1/7] Override http_util only if not already named 'WasmHTTPUtils' --- extension/httpfs/httpfs_extension.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/extension/httpfs/httpfs_extension.cpp b/extension/httpfs/httpfs_extension.cpp index c9bc9853..aab275b5 100644 --- a/extension/httpfs/httpfs_extension.cpp +++ b/extension/httpfs/httpfs_extension.cpp @@ -61,7 +61,12 @@ static void LoadInternal(DatabaseInstance &instance) { // HuggingFace options config.AddExtensionOption("hf_max_per_page", "Debug option to limit number of items returned in list requests", LogicalType::UBIGINT, Value::UBIGINT(0)); - config.http_util = make_shared_ptr(); + + if (config.http_util && config.http_util->GetName() == "WasmHTTPUtils") { + // Already handled, do not override + } else { + config.http_util = make_shared_ptr(); + } auto provider = make_uniq(config); provider->SetAll(); From 454849554de81dcf6ef8cda780205cb0d504d4da Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 2 Jul 2025 15:44:20 +0200 Subject: [PATCH 2/7] Extract some code to hash_functions.cpp/hpp --- CMakeLists.txt | 2 ++ extension/httpfs/crypto.cpp | 23 +---------------- extension/httpfs/hash_functions.cpp | 28 +++++++++++++++++++++ extension/httpfs/include/hash_functions.hpp | 18 +++++++++++++ 4 files changed, 49 insertions(+), 22 deletions(-) create mode 100644 extension/httpfs/hash_functions.cpp create mode 100644 extension/httpfs/include/hash_functions.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 92d45479..40b0d3d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ build_static_extension( extension/httpfs/httpfs_client.cpp extension/httpfs/http_state.cpp extension/httpfs/crypto.cpp + extension/httpfs/hash_functions.cpp extension/httpfs/create_secret_functions.cpp extension/httpfs/httpfs_extension.cpp) @@ -30,6 +31,7 @@ build_loadable_extension( extension/httpfs/httpfs_client.cpp extension/httpfs/http_state.cpp extension/httpfs/crypto.cpp + extension/httpfs/hash_functions.cpp extension/httpfs/create_secret_functions.cpp extension/httpfs/httpfs_extension.cpp) diff --git a/extension/httpfs/crypto.cpp b/extension/httpfs/crypto.cpp index 04bd795e..3a89ca5f 100644 --- a/extension/httpfs/crypto.cpp +++ b/extension/httpfs/crypto.cpp @@ -1,4 +1,5 @@ #include "crypto.hpp" +#include "hash_functions.hpp" #include "mbedtls_wrapper.hpp" #include #include "duckdb/common/common.hpp" @@ -9,28 +10,6 @@ namespace duckdb { -void sha256(const char *in, size_t in_len, hash_bytes &out) { - duckdb_mbedtls::MbedTlsWrapper::ComputeSha256Hash(in, in_len, (char *)out); -} - -void hmac256(const std::string &message, const char *secret, size_t secret_len, hash_bytes &out) { - duckdb_mbedtls::MbedTlsWrapper::Hmac256(secret, secret_len, message.data(), message.size(), (char *)out); -} - -void hmac256(std::string message, hash_bytes secret, hash_bytes &out) { - hmac256(message, (char *)secret, sizeof(hash_bytes), out); -} - -void hex256(hash_bytes &in, hash_str &out) { - const char *hex = "0123456789abcdef"; - unsigned char *pin = in; - unsigned char *pout = out; - for (; pin < in + sizeof(in); pout += 2, pin++) { - pout[0] = hex[(*pin >> 4) & 0xF]; - pout[1] = hex[*pin & 0xF]; - } -} - AESStateSSL::AESStateSSL(const std::string *key) : context(EVP_CIPHER_CTX_new()) { if (!(context)) { throw InternalException("AES GCM failed with initializing context"); diff --git a/extension/httpfs/hash_functions.cpp b/extension/httpfs/hash_functions.cpp new file mode 100644 index 00000000..1e6bb8f1 --- /dev/null +++ b/extension/httpfs/hash_functions.cpp @@ -0,0 +1,28 @@ +#include "mbedtls_wrapper.hpp" +#include "hash_functions.hpp" + +namespace duckdb { + +void sha256(const char *in, size_t in_len, hash_bytes &out) { + duckdb_mbedtls::MbedTlsWrapper::ComputeSha256Hash(in, in_len, (char *)out); +} + +void hmac256(const std::string &message, const char *secret, size_t secret_len, hash_bytes &out) { + duckdb_mbedtls::MbedTlsWrapper::Hmac256(secret, secret_len, message.data(), message.size(), (char *)out); +} + +void hmac256(std::string message, hash_bytes secret, hash_bytes &out) { + hmac256(message, (char *)secret, sizeof(hash_bytes), out); +} + +void hex256(hash_bytes &in, hash_str &out) { + const char *hex = "0123456789abcdef"; + unsigned char *pin = in; + unsigned char *pout = out; + for (; pin < in + sizeof(in); pout += 2, pin++) { + pout[0] = hex[(*pin >> 4) & 0xF]; + pout[1] = hex[*pin & 0xF]; + } +} + +} // namespace duckdb diff --git a/extension/httpfs/include/hash_functions.hpp b/extension/httpfs/include/hash_functions.hpp new file mode 100644 index 00000000..bfefe79e --- /dev/null +++ b/extension/httpfs/include/hash_functions.hpp @@ -0,0 +1,18 @@ +#pragma once + +#include "duckdb/common/helper.hpp" + +namespace duckdb { + +typedef unsigned char hash_bytes[32]; +typedef unsigned char hash_str[64]; + +void sha256(const char *in, size_t in_len, hash_bytes &out); + +void hmac256(const std::string &message, const char *secret, size_t secret_len, hash_bytes &out); + +void hmac256(std::string message, hash_bytes secret, hash_bytes &out); + +void hex256(hash_bytes &in, hash_str &out); + +} // namespace duckdb From ae7230f3f087a8783b978e2894712c00a13cfeac Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 2 Jul 2025 15:50:41 +0200 Subject: [PATCH 3/7] Feature select: no crytpo.cpp and no override of encryption_utils in Wasm --- CMakeLists.txt | 11 +++++++++-- extension/httpfs/httpfs_extension.cpp | 4 ++++ 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 40b0d3d7..5c68ea0c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -9,6 +9,11 @@ add_extension_definitions() include_directories(extension/httpfs/include ${DUCKDB_MODULE_BASE_DIR}/third_party/httplib) +if (NOT EMSCRIPTEN) + set(EXTRA_SOURCES "extension/httpfs/crypto.cpp") + add_definitions(-DOVERRIDE_ENCRYPTION_UTILS=1) +endif() + build_static_extension( httpfs extension/httpfs/hffs.cpp @@ -19,7 +24,8 @@ build_static_extension( extension/httpfs/crypto.cpp extension/httpfs/hash_functions.cpp extension/httpfs/create_secret_functions.cpp - extension/httpfs/httpfs_extension.cpp) + extension/httpfs/httpfs_extension.cpp + ${EXTRA_SOURCES}) set(PARAMETERS "-warnings") build_loadable_extension( @@ -33,7 +39,8 @@ build_loadable_extension( extension/httpfs/crypto.cpp extension/httpfs/hash_functions.cpp extension/httpfs/create_secret_functions.cpp - extension/httpfs/httpfs_extension.cpp) + extension/httpfs/httpfs_extension.cpp + ${EXTRA_SOURCES}) if(MINGW) set(OPENSSL_USE_STATIC_LIBS TRUE) diff --git a/extension/httpfs/httpfs_extension.cpp b/extension/httpfs/httpfs_extension.cpp index aab275b5..945cdd48 100644 --- a/extension/httpfs/httpfs_extension.cpp +++ b/extension/httpfs/httpfs_extension.cpp @@ -6,7 +6,9 @@ #include "duckdb.hpp" #include "s3fs.hpp" #include "hffs.hpp" +#ifdef OVERRIDE_ENCRYPTION_UTILS #include "crypto.hpp" +#endif // OVERRIDE_ENCRYPTION_UTILS namespace duckdb { @@ -74,8 +76,10 @@ static void LoadInternal(DatabaseInstance &instance) { CreateS3SecretFunctions::Register(instance); CreateBearerTokenFunctions::Register(instance); +#ifdef OVERRIDE_ENCRYPTION_UTILS // set pointer to OpenSSL encryption state config.encryption_util = make_shared_ptr(); +#endif // OVERRIDE_ENCRYPTION_UTILS } void HttpfsExtension::Load(DuckDB &db) { LoadInternal(*db.instance); From bb583db996a23a903baea1d8dd67bcf0ffb2424b Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 18 Jun 2025 14:41:08 +0200 Subject: [PATCH 4/7] Bundle-in mbedtls --- extension_config.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/extension_config.cmake b/extension_config.cmake index 58810439..2664bc2a 100644 --- a/extension_config.cmake +++ b/extension_config.cmake @@ -15,4 +15,5 @@ duckdb_extension_load(httpfs SOURCE_DIR ${CMAKE_CURRENT_LIST_DIR} INCLUDE_DIR ${CMAKE_CURRENT_LIST_DIR}/extension/httpfs/include ${LOAD_HTTPFS_TESTS} + LINKED_LIBS "../../third_party/mbedtls/libduckdb_mbedtls.a" ) From 9bf98eff513037410650c7677a2707565e1de2d4 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Wed, 2 Jul 2025 15:55:39 +0200 Subject: [PATCH 5/7] Re-enable wasm compilation --- .github/workflows/MainDistributionPipeline.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/MainDistributionPipeline.yml b/.github/workflows/MainDistributionPipeline.yml index b06d2135..496d87c0 100644 --- a/.github/workflows/MainDistributionPipeline.yml +++ b/.github/workflows/MainDistributionPipeline.yml @@ -19,7 +19,6 @@ jobs: extension_name: httpfs duckdb_version: v1.3-ossivalis ci_tools_version: main - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads' duckdb-stable-deploy: @@ -32,4 +31,3 @@ jobs: duckdb_version: v1.3-ossivalis ci_tools_version: main deploy_latest: ${{ startsWith(github.ref, 'refs/tags/v') || github.ref == 'refs/heads/main' }} - exclude_archs: 'wasm_mvp;wasm_eh;wasm_threads' From 104bec6ca79cb02719940c43c7d27bce2c7d0b29 Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Thu, 3 Jul 2025 09:14:34 +0200 Subject: [PATCH 6/7] Skip conditionally also httpfs_client.cpp --- CMakeLists.txt | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5c68ea0c..6791c0af 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,7 +10,7 @@ include_directories(extension/httpfs/include ${DUCKDB_MODULE_BASE_DIR}/third_party/httplib) if (NOT EMSCRIPTEN) - set(EXTRA_SOURCES "extension/httpfs/crypto.cpp") + set(EXTRA_SOURCES extension/httpfs/crypto.cpp extension/httpfs/httpfs_client.cpp) add_definitions(-DOVERRIDE_ENCRYPTION_UTILS=1) endif() @@ -19,13 +19,12 @@ build_static_extension( extension/httpfs/hffs.cpp extension/httpfs/s3fs.cpp extension/httpfs/httpfs.cpp - extension/httpfs/httpfs_client.cpp extension/httpfs/http_state.cpp extension/httpfs/crypto.cpp extension/httpfs/hash_functions.cpp extension/httpfs/create_secret_functions.cpp extension/httpfs/httpfs_extension.cpp - ${EXTRA_SOURCES}) + ${EXTRA_SOURCES} ) set(PARAMETERS "-warnings") build_loadable_extension( @@ -34,13 +33,12 @@ build_loadable_extension( extension/httpfs/hffs.cpp extension/httpfs/s3fs.cpp extension/httpfs/httpfs.cpp - extension/httpfs/httpfs_client.cpp extension/httpfs/http_state.cpp extension/httpfs/crypto.cpp extension/httpfs/hash_functions.cpp extension/httpfs/create_secret_functions.cpp extension/httpfs/httpfs_extension.cpp - ${EXTRA_SOURCES}) + ${EXTRA_SOURCES} ) if(MINGW) set(OPENSSL_USE_STATIC_LIBS TRUE) From a2cc41149e4e164f853de0ba52c3ff6fad3e11cb Mon Sep 17 00:00:00 2001 From: Carlo Piovesan Date: Thu, 3 Jul 2025 11:16:40 +0200 Subject: [PATCH 7/7] Separate minimal HTTPFSUtil implementations into httpfs_client and httpfs_client_wasm --- CMakeLists.txt | 2 ++ extension/httpfs/httpfs.cpp | 5 +++++ extension/httpfs/httpfs_client.cpp | 4 ---- extension/httpfs/httpfs_client_wasm.cpp | 16 ++++++++++++++++ 4 files changed, 23 insertions(+), 4 deletions(-) create mode 100644 extension/httpfs/httpfs_client_wasm.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 6791c0af..024cb72d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -12,6 +12,8 @@ include_directories(extension/httpfs/include if (NOT EMSCRIPTEN) set(EXTRA_SOURCES extension/httpfs/crypto.cpp extension/httpfs/httpfs_client.cpp) add_definitions(-DOVERRIDE_ENCRYPTION_UTILS=1) +else() + set(EXTRA_SOURCES extension/httpfs/httpfs_client_wasm.cpp) endif() build_static_extension( diff --git a/extension/httpfs/httpfs.cpp b/extension/httpfs/httpfs.cpp index 56c8b8e3..20f85a69 100644 --- a/extension/httpfs/httpfs.cpp +++ b/extension/httpfs/httpfs.cpp @@ -727,4 +727,9 @@ void HTTPFileHandle::StoreClient(unique_ptr client) { HTTPFileHandle::~HTTPFileHandle() { DUCKDB_LOG_FILE_SYSTEM_CLOSE((*this)); }; + +string HTTPFSUtil::GetName() const { + return "HTTPFS"; +} + } // namespace duckdb diff --git a/extension/httpfs/httpfs_client.cpp b/extension/httpfs/httpfs_client.cpp index 84eb457b..3bf5a64f 100644 --- a/extension/httpfs/httpfs_client.cpp +++ b/extension/httpfs/httpfs_client.cpp @@ -160,8 +160,4 @@ unordered_map HTTPFSUtil::ParseGetParameters(const string &text) return result; } -string HTTPFSUtil::GetName() const { - return "HTTPFS"; -} - } // namespace duckdb diff --git a/extension/httpfs/httpfs_client_wasm.cpp b/extension/httpfs/httpfs_client_wasm.cpp new file mode 100644 index 00000000..aaa22bbe --- /dev/null +++ b/extension/httpfs/httpfs_client_wasm.cpp @@ -0,0 +1,16 @@ +#include "httpfs_client.hpp" +#include "http_state.hpp" + +namespace duckdb { + +unique_ptr HTTPFSUtil::InitializeClient(HTTPParams &http_params, const string &proto_host_port) { + throw InternalException("HTTPFSUtil::InitializeClient is not expected to be called"); +} + +unordered_map HTTPFSUtil::ParseGetParameters(const string &text) { + unordered_map result; + //TODO: HTTPFSUtil::ParseGetParameters is currently not implemented + return result; +} + +} // namespace duckdb