Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion duckdb
Submodule duckdb updated 83 files
+1 −0 .github/ISSUE_TEMPLATE/bug_report.yml
+9 −0 .github/config/out_of_tree_extensions.cmake
+210 −0 .github/patches/extensions/httpfs/http_logger.patch
+16 −3 CMakeLists.txt
+7 −0 Makefile
+5 −0 src/catalog/default/default_table_functions.cpp
+18 −0 src/common/enum_util.cpp
+22 −0 src/common/file_system.cpp
+53 −23 src/common/local_file_system.cpp
+4 −0 src/common/virtual_file_system.cpp
+30 −14 src/execution/join_hashtable.cpp
+7 −5 src/execution/operator/csv_scanner/buffer_manager/csv_file_handle.cpp
+21 −1 src/execution/operator/csv_scanner/encode/csv_encoder.cpp
+1 −3 src/execution/operator/persistent/physical_copy_to_file.cpp
+5 −0 src/execution/physical_operator.cpp
+1 −0 src/function/function_list.cpp
+23 −3 src/function/pragma/pragma_functions.cpp
+2 −1 src/function/scalar/system/CMakeLists.txt
+8 −0 src/function/scalar/system/functions.json
+80 −0 src/function/scalar/system/parse_log_message.cpp
+2 −2 src/function/scalar/system/write_log.cpp
+1 −4 src/function/table/read_csv.cpp
+8 −0 src/include/duckdb/common/enum_util.hpp
+3 −0 src/include/duckdb/common/enums/operator_result_type.hpp
+8 −0 src/include/duckdb/common/file_system.hpp
+5 −2 src/include/duckdb/common/http_util.hpp
+1 −1 src/include/duckdb/common/multi_file/multi_file_reader.hpp
+6 −0 src/include/duckdb/common/opener_file_system.hpp
+1 −0 src/include/duckdb/common/virtual_file_system.hpp
+3 −3 src/include/duckdb/execution/operator/csv_scanner/csv_file_handle.hpp
+1 −1 src/include/duckdb/execution/operator/csv_scanner/encode/csv_encoder.hpp
+6 −0 src/include/duckdb/execution/physical_operator.hpp
+5 −0 src/include/duckdb/execution/physical_operator_states.hpp
+18 −10 src/include/duckdb/function/encoding_function.hpp
+10 −0 src/include/duckdb/function/scalar/system_functions.hpp
+32 −0 src/include/duckdb/logging/file_system_logger.hpp
+0 −83 src/include/duckdb/logging/http_logger.hpp
+12 −1 src/include/duckdb/logging/log_manager.hpp
+91 −0 src/include/duckdb/logging/log_type.hpp
+24 −25 src/include/duckdb/logging/logger.hpp
+4 −4 src/include/duckdb/main/client_config.hpp
+1 −1 src/include/duckdb/main/client_context.hpp
+2 −5 src/include/duckdb/main/client_data.hpp
+1 −0 src/include/duckdb/main/config.hpp
+0 −2 src/include/duckdb/main/extension_helper.hpp
+1 −0 src/include/duckdb/parallel/pipeline.hpp
+8 −2 src/logging/CMakeLists.txt
+64 −0 src/logging/log_manager.cpp
+93 −0 src/logging/log_types.cpp
+5 −0 src/logging/logger.cpp
+2 −1 src/main/client_context.cpp
+0 −2 src/main/client_data.cpp
+1 −1 src/main/database.cpp
+3 −2 src/main/extension/extension_helper.cpp
+25 −31 src/main/extension/extension_install.cpp
+16 −10 src/main/http/http_util.cpp
+2 −6 src/main/secret/secret_storage.cpp
+7 −1 src/optimizer/topn_optimizer.cpp
+4 −0 src/parallel/pipeline.cpp
+16 −1 src/parallel/pipeline_finish_event.cpp
+2 −6 src/storage/storage_manager.cpp
+0 −1 src/storage/table/column_checkpoint_state.cpp
+1 −1 src/storage/table/column_data_checkpointer.cpp
+1 −1 src/storage/write_ahead_log.cpp
+1 −0 test/api/test_reset.cpp
+28 −0 test/extension/autoloading_encodings.test
+1 −3 test/helpers/test_helpers.cpp
+7 −7 test/logging/test_logging.cpp
+43 −0 test/sql/copy/s3/http_log.test
+35 −0 test/sql/join/inner/test_join_with_nulls.test_slow
+58 −0 test/sql/logging/file_system_logging.test
+38 −0 test/sql/logging/file_system_logging_attach.test
+47 −0 test/sql/logging/http_logging.test
+4 −4 test/sql/logging/logging.test
+1 −1 test/sql/logging/logging_context_ids.test
+44 −0 test/sql/logging/logging_types.test
+3 −1 test/sql/logging/test_logging_function_large.test_slow
+4 −0 test/sql/storage/compression/dict_fsst/dictionary_covers_validity.test
+2 −2 test/sql/storage/compression/roaring/roaring_analyze_array.test
+2 −2 test/sql/storage/compression/roaring/roaring_analyze_bitset.test
+2 −2 test/sql/storage/compression/roaring/roaring_analyze_run.test
+4 −4 test/sql/storage/compression/zstd/zstd_compression_ratio.test_slow
+1 −12 tools/shell/tests/test_http_logging.py
2 changes: 1 addition & 1 deletion extension/httpfs/create_secret_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,7 @@ bool CreateS3SecretFunctions::TryRefreshS3Secret(ClientContext &context, const S
try {
auto res = secret_manager.CreateSecret(context, refresh_input);
auto &new_secret = dynamic_cast<const KeyValueSecret &>(*res->secret);
DUCKDB_LOG_INFO(context, "httpfs.SecretRefresh", "Successfully refreshed secret: %s, new key_id: %s",
DUCKDB_LOG_INFO(context, "Successfully refreshed secret: %s, new key_id: %s",
secret_to_refresh.secret->GetName(), new_secret.TryGetValue("key_id").ToString());
return true;
} catch (std::exception &ex) {
Expand Down
29 changes: 22 additions & 7 deletions extension/httpfs/httpfs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
#include "duckdb/common/thread.hpp"
#include "duckdb/common/types/hash.hpp"
#include "duckdb/function/scalar/strftime_format.hpp"
#include "duckdb/logging/http_logger.hpp"
#include "duckdb/logging/file_system_logger.hpp"
#include "duckdb/main/client_context.hpp"
#include "duckdb/main/database.hpp"
#include "duckdb/main/secret/secret_manager.hpp"
Expand All @@ -32,7 +32,8 @@ shared_ptr<HTTPUtil> HTTPFSUtil::GetHTTPUtil(optional_ptr<FileOpener> opener) {
return make_shared_ptr<HTTPFSUtil>();
}

unique_ptr<HTTPParams> HTTPFSUtil::InitializeParameters(optional_ptr<FileOpener> opener, optional_ptr<FileOpenerInfo> info) {
unique_ptr<HTTPParams> HTTPFSUtil::InitializeParameters(optional_ptr<FileOpener> opener,
optional_ptr<FileOpenerInfo> info) {
auto result = make_uniq<HTTPFSParams>(*this);
result->Initialize(opener);

Expand Down Expand Up @@ -277,8 +278,8 @@ void TimestampToTimeT(timestamp_t timestamp, time_t &result) {

HTTPFileHandle::HTTPFileHandle(FileSystem &fs, const OpenFileInfo &file, FileOpenFlags flags,
unique_ptr<HTTPParams> params_p)
: FileHandle(fs, file.path, flags), params(std::move(params_p)), http_params(params->Cast<HTTPFSParams>()), flags(flags), length(0),
buffer_available(0), buffer_idx(0), file_offset(0), buffer_start(0), buffer_end(0) {
: FileHandle(fs, file.path, flags), params(std::move(params_p)), http_params(params->Cast<HTTPFSParams>()),
flags(flags), length(0), buffer_available(0), buffer_idx(0), file_offset(0), buffer_start(0), buffer_end(0) {
// check if the handle has extended properties that can be set directly in the handle
// if we have these properties we don't need to do a head request to obtain them later
if (file.extended_info) {
Expand Down Expand Up @@ -342,6 +343,9 @@ unique_ptr<FileHandle> HTTPFileSystem::OpenFileExtended(const OpenFileInfo &file

auto handle = CreateHandle(file, flags, opener);
handle->Initialize(opener);

DUCKDB_LOG_FILE_SYSTEM_OPEN((*handle));

return std::move(handle);
}

Expand All @@ -356,6 +360,8 @@ void HTTPFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, id
throw InternalException("Cached file not initialized properly");
}
memcpy(buffer, hfh.cached_file_handle->GetData() + location, nr_bytes);
DUCKDB_LOG_FILE_SYSTEM_READ(handle, nr_bytes, location);
hfh.file_offset = location + nr_bytes;
return;
}

Expand All @@ -366,17 +372,19 @@ void HTTPFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, id
bool skip_buffer = hfh.flags.DirectIO() || hfh.flags.RequireParallelAccess();
if (skip_buffer && to_read > 0) {
GetRangeRequest(hfh, hfh.path, {}, location, (char *)buffer, to_read);

DUCKDB_LOG_FILE_SYSTEM_READ(handle, nr_bytes, location);
// Update handle status within critical section for parallel access.
if (hfh.flags.RequireParallelAccess()) {
std::lock_guard<std::mutex> lck(hfh.mu);
hfh.buffer_available = 0;
hfh.buffer_idx = 0;
hfh.file_offset = location + nr_bytes;
return;
}

hfh.buffer_available = 0;
hfh.buffer_idx = 0;
hfh.file_offset = location + nr_bytes;
return;
}

Expand Down Expand Up @@ -423,14 +431,15 @@ void HTTPFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes, id
}
}
}
hfh.file_offset = location + nr_bytes;
DUCKDB_LOG_FILE_SYSTEM_READ(handle, nr_bytes, location);
}

int64_t HTTPFileSystem::Read(FileHandle &handle, void *buffer, int64_t nr_bytes) {
auto &hfh = handle.Cast<HTTPFileHandle>();
idx_t max_read = hfh.length - hfh.file_offset;
nr_bytes = MinValue<idx_t>(max_read, nr_bytes);
Read(handle, buffer, nr_bytes, hfh.file_offset);
hfh.file_offset += nr_bytes;
return nr_bytes;
}

Expand Down Expand Up @@ -642,6 +651,10 @@ void HTTPFileHandle::Initialize(optional_ptr<FileOpener> opener) {
http_params.state = make_shared_ptr<HTTPState>();
}

if (opener) {
TryAddLogger(*opener);
}

auto current_cache = TryGetMetadataCache(opener, hfs);

bool should_write_cache = false;
Expand Down Expand Up @@ -711,5 +724,7 @@ void HTTPFileHandle::StoreClient(unique_ptr<HTTPClient> client) {
client_cache.StoreClient(std::move(client));
}

HTTPFileHandle::~HTTPFileHandle() = default;
HTTPFileHandle::~HTTPFileHandle() {
DUCKDB_LOG_FILE_SYSTEM_CLOSE((*this));
};
} // namespace duckdb
7 changes: 0 additions & 7 deletions extension/httpfs/httpfs_client.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#include "httpfs_client.hpp"
#include "http_state.hpp"
#include "duckdb/logging/http_logger.hpp"

#define CPPHTTPLIB_OPENSSL_SUPPORT
#include "httplib.hpp"
Expand All @@ -21,9 +20,6 @@ class HTTPFSClient : public HTTPClient {
client->set_read_timeout(http_params.timeout, http_params.timeout_usec);
client->set_connection_timeout(http_params.timeout, http_params.timeout_usec);
client->set_decompress(false);
if (http_params.logger) {
SetLogger(*http_params.logger);
}
if (!http_params.bearer_token.empty()) {
client->set_bearer_token_auth(http_params.bearer_token.c_str());
}
Expand All @@ -38,9 +34,6 @@ class HTTPFSClient : public HTTPClient {
state = http_params.state;
}

void SetLogger(HTTPLogger &logger) {
client->set_logger(logger.GetLogger<duckdb_httplib_openssl::Request, duckdb_httplib_openssl::Response>());
}
unique_ptr<HTTPResponse> Get(GetRequestInfo &info) override {
if (state) {
state->get_count++;
Expand Down
6 changes: 4 additions & 2 deletions extension/httpfs/include/httpfs_client.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@ struct FileOpenerInfo;
class HTTPState;

struct HTTPFSParams : public HTTPParams {
HTTPFSParams(HTTPUtil &http_util) : HTTPParams(http_util) {}
HTTPFSParams(HTTPUtil &http_util) : HTTPParams(http_util) {
}

static constexpr bool DEFAULT_ENABLE_SERVER_CERT_VERIFICATION = false;
static constexpr uint64_t DEFAULT_HF_MAX_PER_PAGE = 0;
Expand All @@ -25,7 +26,8 @@ struct HTTPFSParams : public HTTPParams {

class HTTPFSUtil : public HTTPUtil {
public:
unique_ptr<HTTPParams> InitializeParameters(optional_ptr<FileOpener> opener, optional_ptr<FileOpenerInfo> info) override;
unique_ptr<HTTPParams> InitializeParameters(optional_ptr<FileOpener> opener,
optional_ptr<FileOpenerInfo> info) override;
unique_ptr<HTTPClient> InitializeClient(HTTPParams &http_params, const string &proto_host_port) override;

static unordered_map<string, string> ParseGetParameters(const string &text);
Expand Down
8 changes: 6 additions & 2 deletions extension/httpfs/s3fs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
#include "duckdb.hpp"
#ifndef DUCKDB_AMALGAMATION
#include "duckdb/common/exception/http_exception.hpp"
#include "duckdb/logging/log_type.hpp"
#include "duckdb/logging/file_system_logger.hpp"
#include "duckdb/common/helper.hpp"
#include "duckdb/common/thread.hpp"
#include "duckdb/common/types/timestamp.hpp"
Expand Down Expand Up @@ -838,6 +840,8 @@ void S3FileSystem::Write(FileHandle &handle, void *buffer, int64_t nr_bytes, idx
s3fh.file_offset += bytes_to_write;
bytes_written += bytes_to_write;
}

DUCKDB_LOG_FILE_SYSTEM_WRITE(handle, bytes_written, s3fh.file_offset - bytes_written);
}

static bool Match(vector<string>::const_iterator key, vector<string>::const_iterator key_end,
Expand Down Expand Up @@ -918,8 +922,8 @@ vector<OpenFileInfo> S3FileSystem::Glob(const string &glob_pattern, FileOpener *
string common_prefix_continuation_token;
do {
auto prefix_res =
AWSListObjectV2::Request(prefix_path, *http_params, s3_auth_params, common_prefix_continuation_token,
HTTPState::TryGetState(opener).get());
AWSListObjectV2::Request(prefix_path, *http_params, s3_auth_params,
common_prefix_continuation_token, HTTPState::TryGetState(opener).get());
AWSListObjectV2::ParseFileList(prefix_res, s3_keys);
auto more_prefixes = AWSListObjectV2::ParseCommonPrefix(prefix_res);
common_prefixes.insert(common_prefixes.end(), more_prefixes.begin(), more_prefixes.end());
Expand Down
6 changes: 3 additions & 3 deletions test/sql/secret/secret_refresh.test
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ statement ok
FROM "s3://test-bucket/test-file.parquet"

query I
SELECT message[0:46] FROM duckdb_logs WHERE type='httpfs.SecretRefresh'
SELECT message[0:46] FROM duckdb_logs WHERE message like '%Successfully refreshed secret%'
----
Successfully refreshed secret: s1, new key_id:

Expand All @@ -84,7 +84,7 @@ FROM "s3://test-bucket/test-file.parquet"
HTTP 403

query I
SELECT message[0:46] FROM duckdb_logs WHERE type='httpfs.SecretRefresh'
SELECT message[0:46] FROM duckdb_logs WHERE message like '%Successfully refreshed secret%'
----
Successfully refreshed secret: s1, new key_id:

Expand Down Expand Up @@ -125,5 +125,5 @@ HTTP 403

# -> log empty
query II
SELECT log_level, message FROM duckdb_logs WHERE type='httpfs.SecretRefresh'
SELECT log_level, message FROM duckdb_logs WHERE message like '%Successfully refreshed secret%'
----
2 changes: 1 addition & 1 deletion test/sql/secret/secret_refresh_attach.test
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,6 @@ ATTACH 's3://test-bucket/presigned/attach.db' AS db (READONLY 1);

# Secret refresh has been triggered
query II
SELECT log_level, message FROM duckdb_logs WHERE type='httpfs.SecretRefresh'
SELECT log_level, message FROM duckdb_logs WHERE message like '%Successfully refreshed secret%'
----
INFO Successfully refreshed secret: uhuh_this_mah_sh, new key_id: all the girls
Loading