diff --git a/Cargo.toml b/Cargo.toml index 6d009a469..769b90ad4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,11 +1,17 @@ [workspace] -default-members = ["bin/bucketd"] +default-members = ["crates/embucketd"] members = [ - "bin/bucketd", - "crates/metastore", - "crates/history", - "crates/runtime", - "crates/utils", + "crates/embucketd", + "crates/api-iceberg-rest", + "crates/api-internal-rest", + "crates/api-snowflake-rest", + "crates/api-ui", + "crates/df-builtins", + "crates/df-catalog", + "crates/core-executor", + "crates/core-history", + "crates/core-metastore", + "crates/core-utils", "crates/api-sessions", ] resolver = "2" package.license-file = "LICENSE" @@ -17,15 +23,32 @@ debug = true codegen-units = 16 [workspace.dependencies] +aws-config = { version = "1.5.17" } +aws-credential-types = { version = "1.2.1", features = ["hardcoded-credentials"]} + jsonwebtoken = "9.3.1" chrono = { version = "0.4.41", default-features = false, features = ["serde", "clock"] } futures = { version = "0.3" } uuid = { version = "1.10.0", features = ["v4", "serde"] } axum = { version = "0.8.1", features = ["multipart", "macros"] } axum-macros = "0.5" +dashmap = "6.1.0" +regex = "1.11" +indexmap = "2.7.1" +time = "0.3.37" +tower-sessions = { version = "0.14.0" } +url = "2.5" +tower-http = { version = "0.6.1", features = [ + "catch-panic", + "timeout", + "sensitive-headers", + "cors", + "trace", +] } tokio = { version = "1", features = ["full"] } async-trait = { version = "0.1.84" } serde = { version = "1.0", features = ["derive"] } +strum = { version = "0.26.3", features = ["derive"] } slatedb = { version = "0.6.1" } bytes = { version = "1.8.0" } snmalloc-rs = { version = "0.3" } @@ -34,6 +57,7 @@ object_store = { version = "0.12.0", features = ["aws", "gcp", "azure"] } serde_json = "1.0" serde_yaml = "0.9" tar = "0.4.44" +http = "1.2" tower = { version = "0.5", features = ["util"] } http-body-util = "0.1.0" #iceberg = { git = "https://github.com/hansetag/iceberg-rust.git", branch = "ct/builder-upstream-20241002" } @@ -48,6 +72,7 @@ snafu = { version = "0.8.5", features = ["futures"] } tracing = { version = "0.1", features = ["attributes"] } tracing-attributes = { version = "0.1.28" } embucket_history = { version = "0.1.0", path = "crates/history" } +validator = { version = "0.20.0", features = ["derive"] } datafusion = { version = "47.0.0" } datafusion-common = { version = "47.0.0" } diff --git a/bin/bucketd/src/main.rs b/bin/bucketd/src/main.rs deleted file mode 100644 index 9d747812d..000000000 --- a/bin/bucketd/src/main.rs +++ /dev/null @@ -1,83 +0,0 @@ -pub(crate) mod cli; - -use clap::Parser; -use dotenv::dotenv; -use embucket_runtime::{ - config::{AuthConfig, DbConfig, RuntimeConfig}, - http::config::WebConfig, - http::web_assets::config::StaticWebConfig, - run_binary, -}; -use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; - -#[global_allocator] -static ALLOCATOR: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc; - -#[tokio::main] -#[allow(clippy::expect_used, clippy::unwrap_used, clippy::print_stdout)] -async fn main() { - dotenv().ok(); - - tracing_subscriber::registry() - .with( - tracing_subscriber::EnvFilter::try_from_default_env() - .unwrap_or_else(|_| "bucketd=debug,embucket_runtime=debug,tower_http=debug".into()), - ) - .with(tracing_subscriber::fmt::layer()) - .init(); - - let opts = cli::CliOpts::parse(); - let slatedb_prefix = opts.slatedb_prefix.clone(); - let host = opts.host.clone().unwrap(); - let iceberg_catalog_url = opts.catalog_url.clone().unwrap(); - let port = opts.port.unwrap(); - let web_assets_port = opts.assets_port.unwrap(); - let allow_origin = if opts.cors_enabled.unwrap_or(false) { - opts.cors_allow_origin.clone() - } else { - None - }; - let jwt_secret = opts.jwt_secret(); - let demo_user = opts.auth_demo_user.clone().unwrap(); - let demo_password = opts.auth_demo_password.clone().unwrap(); - - let dbt_serialization_format = opts - .data_format - .clone() - .unwrap_or_else(|| "json".to_string()); - let object_store = opts.object_store_backend(); - let mut auth_config = AuthConfig::new(jwt_secret); - auth_config.with_demo_credentials(demo_user, demo_password); - - match object_store { - Err(e) => { - tracing::error!("Failed to create object store: {:?}", e); - return; - } - Ok(object_store) => { - tracing::info!("Starting embucket"); - - let runtime_config = RuntimeConfig { - db: DbConfig { - slatedb_prefix: slatedb_prefix.clone(), - }, - web: WebConfig { - host: host.clone(), - port, - allow_origin: allow_origin.clone(), - data_format: dbt_serialization_format, - iceberg_catalog_url, - }, - web_assets: StaticWebConfig { - host, - port: web_assets_port, - allow_origin, - }, - }; - - if let Err(e) = run_binary(object_store, runtime_config, auth_config).await { - tracing::error!("Error while running: {:?}", e); - } - } - } -} diff --git a/crates/api-iceberg-rest/Cargo.toml b/crates/api-iceberg-rest/Cargo.toml new file mode 100644 index 000000000..b15fdca59 --- /dev/null +++ b/crates/api-iceberg-rest/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "api-iceberg-rest" +version = "0.1.0" +edition = "2024" +license-file.workspace = true + +[dependencies] +core-metastore = { path = "../core-metastore" } +core-utils = { path = "../core-utils" } + +axum = { workspace = true } +http = { workspace = true } +iceberg-rest-catalog = { workspace = true } +iceberg-rust = { workspace = true } +iceberg-rust-spec = { workspace = true } +object_store = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +snafu = { workspace = true } +tracing = { workspace = true } +validator = { workspace = true } + + +[lints] +workspace = true diff --git a/crates/api-iceberg-rest/src/error.rs b/crates/api-iceberg-rest/src/error.rs new file mode 100644 index 000000000..1a1c4db41 --- /dev/null +++ b/crates/api-iceberg-rest/src/error.rs @@ -0,0 +1,58 @@ +use axum::{Json, response::IntoResponse}; +use core_metastore::error::MetastoreError; +use http; +use serde::{Deserialize, Serialize}; +use snafu::prelude::*; + +#[derive(Snafu, Debug)] +pub struct IcebergAPIError(pub MetastoreError); +pub type IcebergAPIResult = Result; + +#[derive(Debug, Serialize, Deserialize)] +pub struct ErrorResponse { + pub message: String, + pub status_code: u16, +} + +impl IntoResponse for IcebergAPIError { + fn into_response(self) -> axum::response::Response { + let message = (self.0.to_string(),); + let code = match self.0 { + MetastoreError::TableDataExists { .. } + | MetastoreError::ObjectAlreadyExists { .. } + | MetastoreError::VolumeAlreadyExists { .. } + | MetastoreError::DatabaseAlreadyExists { .. } + | MetastoreError::SchemaAlreadyExists { .. } + | MetastoreError::TableAlreadyExists { .. } + | MetastoreError::VolumeInUse { .. } => http::StatusCode::CONFLICT, + MetastoreError::TableRequirementFailed { .. } => http::StatusCode::UNPROCESSABLE_ENTITY, + MetastoreError::VolumeValidationFailed { .. } + | MetastoreError::VolumeMissingCredentials + | MetastoreError::Validation { .. } => http::StatusCode::BAD_REQUEST, + MetastoreError::CloudProviderNotImplemented { .. } => { + http::StatusCode::PRECONDITION_FAILED + } + MetastoreError::VolumeNotFound { .. } + | MetastoreError::DatabaseNotFound { .. } + | MetastoreError::SchemaNotFound { .. } + | MetastoreError::TableNotFound { .. } + | MetastoreError::ObjectNotFound => http::StatusCode::NOT_FOUND, + MetastoreError::ObjectStore { .. } + | MetastoreError::ObjectStorePath { .. } + | MetastoreError::CreateDirectory { .. } + | MetastoreError::SlateDB { .. } + | MetastoreError::UtilSlateDB { .. } + | MetastoreError::Iceberg { .. } + | MetastoreError::Serde { .. } + | MetastoreError::TableMetadataBuilder { .. } + | MetastoreError::TableObjectStoreNotFound { .. } + | MetastoreError::UrlParse { .. } => http::StatusCode::INTERNAL_SERVER_ERROR, + }; + + let error = ErrorResponse { + message: message.0, + status_code: code.as_u16(), + }; + (code, Json(error)).into_response() + } +} diff --git a/crates/runtime/src/http/catalog/handlers.rs b/crates/api-iceberg-rest/src/handlers.rs similarity index 81% rename from crates/runtime/src/http/catalog/handlers.rs rename to crates/api-iceberg-rest/src/handlers.rs index 0059960db..a14666043 100644 --- a/crates/runtime/src/http/catalog/handlers.rs +++ b/crates/api-iceberg-rest/src/handlers.rs @@ -1,14 +1,14 @@ -use crate::http::catalog::schemas::{ - from_get_schema, from_schema, from_schemas_list, from_tables_list, to_create_table, to_schema, - to_table_commit, CommitTable, GetConfigQuery, +use crate::error::{IcebergAPIError, IcebergAPIResult}; +use crate::schemas::{ + CommitTable, GetConfigQuery, from_get_schema, from_schema, from_schemas_list, from_tables_list, + to_create_table, to_schema, to_table_commit, }; -use crate::http::metastore::error::{MetastoreAPIError, MetastoreAPIResult}; -use crate::http::state::AppState; +use crate::state::State as AppState; use axum::http::StatusCode; -use axum::{extract::Path, extract::Query, extract::State, Json}; -use embucket_metastore::error::{self as metastore_error, MetastoreError}; -use embucket_metastore::{SchemaIdent as MetastoreSchemaIdent, TableIdent as MetastoreTableIdent}; -use embucket_utils::scan_iterator::ScanIterator; +use axum::{Json, extract::Path, extract::Query, extract::State}; +use core_metastore::error::{self as metastore_error, MetastoreError}; +use core_metastore::{SchemaIdent as MetastoreSchemaIdent, TableIdent as MetastoreTableIdent}; +use core_utils::scan_iterator::ScanIterator; use iceberg_rest_catalog::models::{ CatalogConfig, CommitTableResponse, CreateNamespaceRequest, CreateNamespaceResponse, CreateTableRequest, GetNamespaceResponse, ListNamespacesResponse, ListTablesResponse, @@ -16,7 +16,7 @@ use iceberg_rest_catalog::models::{ }; use iceberg_rust_spec::table_metadata::TableMetadata; use object_store::ObjectStore; -use serde_json::{from_slice, Value}; +use serde_json::{Value, from_slice}; use snafu::ResultExt; use std::collections::HashMap; use validator::Validate; @@ -26,7 +26,7 @@ pub async fn create_namespace( State(state): State, Path(database_name): Path, Json(schema): Json, -) -> MetastoreAPIResult> { +) -> IcebergAPIResult> { let ib_schema = to_schema(schema, database_name); let schema = state .metastore @@ -39,7 +39,7 @@ pub async fn create_namespace( pub async fn get_namespace( State(state): State, Path((database_name, schema_name)): Path<(String, String)>, -) -> MetastoreAPIResult> { +) -> IcebergAPIResult> { let schema_ident = MetastoreSchemaIdent { database: database_name.clone(), schema: schema_name.clone(), @@ -48,9 +48,9 @@ pub async fn get_namespace( .metastore .get_schema(&schema_ident) .await - .map_err(|e: MetastoreError| MetastoreAPIError::from(e))? + .map_err(|e: MetastoreError| IcebergAPIError::from(e))? .ok_or_else(|| { - MetastoreAPIError::from(MetastoreError::SchemaNotFound { + IcebergAPIError::from(MetastoreError::SchemaNotFound { db: database_name.clone(), schema: schema_name.clone(), }) @@ -62,13 +62,13 @@ pub async fn get_namespace( pub async fn delete_namespace( State(state): State, Path((database_name, schema_name)): Path<(String, String)>, -) -> MetastoreAPIResult { +) -> IcebergAPIResult { let schema_ident = MetastoreSchemaIdent::new(database_name, schema_name); state .metastore .delete_schema(&schema_ident, true) .await - .map_err(MetastoreAPIError)?; + .map_err(IcebergAPIError)?; Ok(StatusCode::NO_CONTENT) } @@ -76,13 +76,13 @@ pub async fn delete_namespace( pub async fn list_namespaces( State(state): State, Path(database_name): Path, -) -> MetastoreAPIResult> { +) -> IcebergAPIResult> { let schemas = state .metastore .iter_schemas(&database_name) .collect() .await - .map_err(|e| MetastoreAPIError(MetastoreError::UtilSlateDB { source: e }))?; + .map_err(|e| IcebergAPIError(MetastoreError::UtilSlateDB { source: e }))?; Ok(Json(from_schemas_list(schemas))) } @@ -91,7 +91,7 @@ pub async fn create_table( State(state): State, Path((database_name, schema_name)): Path<(String, String)>, Json(table): Json, -) -> MetastoreAPIResult> { +) -> IcebergAPIResult> { let table_ident = MetastoreTableIdent::new(&database_name, &schema_name, &table.name); let volume_ident = state .metastore @@ -107,7 +107,7 @@ pub async fn create_table( .metastore .create_table(&table_ident, ib_create_table) .await - .map_err(MetastoreAPIError)?; + .map_err(IcebergAPIError)?; Ok(Json(LoadTableResult::new(table.data.metadata))) } @@ -116,7 +116,7 @@ pub async fn register_table( State(state): State, Path((database_name, schema_name)): Path<(String, String)>, Json(register): Json, -) -> MetastoreAPIResult> { +) -> IcebergAPIResult> { let table_ident = MetastoreTableIdent::new(&database_name, &schema_name, ®ister.name); let metadata_raw = state .metastore @@ -146,14 +146,14 @@ pub async fn commit_table( State(state): State, Path((database_name, schema_name, table_name)): Path<(String, String, String)>, Json(commit): Json, -) -> MetastoreAPIResult> { +) -> IcebergAPIResult> { let table_ident = MetastoreTableIdent::new(&database_name, &schema_name, &table_name); let table_updates = to_table_commit(commit); let ib_table = state .metastore .update_table(&table_ident, table_updates) .await - .map_err(MetastoreAPIError)?; + .map_err(IcebergAPIError)?; Ok(Json(CommitTableResponse::new( ib_table.data.metadata_location, ib_table.data.metadata, @@ -164,15 +164,15 @@ pub async fn commit_table( pub async fn get_table( State(state): State, Path((database_name, schema_name, table_name)): Path<(String, String, String)>, -) -> MetastoreAPIResult> { +) -> IcebergAPIResult> { let table_ident = MetastoreTableIdent::new(&database_name, &schema_name, &table_name); let table = state .metastore .get_table(&table_ident) .await - .map_err(|e: MetastoreError| MetastoreAPIError::from(e))? + .map_err(|e: MetastoreError| IcebergAPIError::from(e))? .ok_or_else(|| { - MetastoreAPIError::from(MetastoreError::TableNotFound { + IcebergAPIError::from(MetastoreError::TableNotFound { db: database_name.clone(), schema: schema_name.clone(), table: table_name.clone(), @@ -185,13 +185,13 @@ pub async fn get_table( pub async fn delete_table( State(state): State, Path((database_name, schema_name, table_name)): Path<(String, String, String)>, -) -> MetastoreAPIResult { +) -> IcebergAPIResult { let table_ident = MetastoreTableIdent::new(&database_name, &schema_name, &table_name); state .metastore .delete_table(&table_ident, true) .await - .map_err(MetastoreAPIError)?; + .map_err(IcebergAPIError)?; Ok(StatusCode::NO_CONTENT) } @@ -199,14 +199,14 @@ pub async fn delete_table( pub async fn list_tables( State(state): State, Path((database_name, schema_name)): Path<(String, String)>, -) -> MetastoreAPIResult> { +) -> IcebergAPIResult> { let schema_ident = MetastoreSchemaIdent::new(database_name, schema_name); let tables = state .metastore .iter_tables(&schema_ident) .collect() .await - .map_err(|e| MetastoreAPIError(MetastoreError::UtilSlateDB { source: e }))?; + .map_err(|e| IcebergAPIError(MetastoreError::UtilSlateDB { source: e }))?; Ok(Json(from_tables_list(tables))) } @@ -215,7 +215,7 @@ pub async fn report_metrics( State(_state): State, Path((database_name, schema_name, table_name)): Path<(String, String, String)>, Json(metrics): Json, -) -> MetastoreAPIResult { +) -> IcebergAPIResult { tracing::info!( "Received metrics for table {database_name}.{schema_name}.{table_name}: {:?}", metrics @@ -227,7 +227,7 @@ pub async fn report_metrics( pub async fn get_config( State(state): State, Query(params): Query, -) -> MetastoreAPIResult> { +) -> IcebergAPIResult> { let catalog_url = state.config.iceberg_catalog_url.clone(); let config = CatalogConfig { defaults: HashMap::new(), @@ -247,7 +247,7 @@ pub async fn get_config( pub async fn list_views( State(_state): State, Path((database_name, schema_name)): Path<(String, String)>, -) -> MetastoreAPIResult> { +) -> IcebergAPIResult> { Ok(Json(ListTablesResponse { next_page_token: None, identifiers: None, diff --git a/crates/runtime/src/http/dbt/mod.rs b/crates/api-iceberg-rest/src/lib.rs similarity index 81% rename from crates/runtime/src/http/dbt/mod.rs rename to crates/api-iceberg-rest/src/lib.rs index a219c6340..e89367a8a 100644 --- a/crates/runtime/src/http/dbt/mod.rs +++ b/crates/api-iceberg-rest/src/lib.rs @@ -2,3 +2,4 @@ pub mod error; pub mod handlers; pub mod router; pub mod schemas; +pub mod state; diff --git a/crates/runtime/src/http/catalog/router.rs b/crates/api-iceberg-rest/src/router.rs similarity index 74% rename from crates/runtime/src/http/catalog/router.rs rename to crates/api-iceberg-rest/src/router.rs index f8bf5485c..6621806d3 100644 --- a/crates/runtime/src/http/catalog/router.rs +++ b/crates/api-iceberg-rest/src/router.rs @@ -1,12 +1,15 @@ -use crate::http::state::AppState; -use axum::routing::{delete, get, post}; +use crate::state::State; use axum::Router; +use axum::routing::{delete, get, post}; -#[allow(clippy::wildcard_imports)] -use crate::http::catalog::handlers::*; +use crate::handlers::{ + commit_table, create_namespace, create_table, delete_namespace, delete_table, get_config, + get_namespace, get_table, list_namespaces, list_tables, list_views, register_table, + report_metrics, +}; -pub fn create_router() -> Router { - let table_router: Router = Router::new() +pub fn create_router() -> Router { + let table_router: Router = Router::new() .route("/", post(create_table)) .route("/", get(list_tables)) .route("/{table}", get(get_table)) @@ -16,7 +19,7 @@ pub fn create_router() -> Router { // only one endpoint is defined for the catalog implementation to work // we don't actually have functionality for views yet - let view_router: Router = Router::new().route("/", get(list_views)); + let view_router: Router = Router::new().route("/", get(list_views)); let ns_router = Router::new() .route("/", get(list_namespaces)) diff --git a/crates/runtime/src/http/catalog/schemas.rs b/crates/api-iceberg-rest/src/schemas.rs similarity index 99% rename from crates/runtime/src/http/catalog/schemas.rs rename to crates/api-iceberg-rest/src/schemas.rs index a86ddd820..23e854430 100644 --- a/crates/runtime/src/http/catalog/schemas.rs +++ b/crates/api-iceberg-rest/src/schemas.rs @@ -1,4 +1,4 @@ -use embucket_metastore::{ +use core_metastore::{ RwObject, Schema as MetastoreSchema, SchemaIdent as MetastoreSchemaIdent, Table as MetastoreTable, TableCreateRequest as MetastoreTableCreateRequest, TableFormat as MetastoreTableFormat, TableIdent as MetastoreTableIdent, diff --git a/crates/api-iceberg-rest/src/state.rs b/crates/api-iceberg-rest/src/state.rs new file mode 100644 index 000000000..fa3831a7a --- /dev/null +++ b/crates/api-iceberg-rest/src/state.rs @@ -0,0 +1,22 @@ +use core_metastore::metastore::Metastore; +use std::sync::Arc; + +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Config { + pub iceberg_catalog_url: String, +} + +#[derive(Clone)] +pub struct State { + pub metastore: Arc, + pub config: Arc, +} + +impl State { + // You can add helper methods for state initialization if needed + pub fn new(metastore: Arc, config: Arc) -> Self { + Self { metastore, config } + } +} diff --git a/crates/api-internal-rest/Cargo.toml b/crates/api-internal-rest/Cargo.toml new file mode 100644 index 000000000..66c7cf30d --- /dev/null +++ b/crates/api-internal-rest/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "api-internal-rest" +version = "0.1.0" +edition = "2024" +license-file.workspace = true + +[dependencies] +core-metastore = { path = "../core-metastore" } +core-utils = { path = "../core-utils" } + +axum = { workspace = true } +http = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +snafu = { workspace = true } +validator = { workspace = true } +tracing = { workspace = true } + +[lints] +workspace = true diff --git a/crates/runtime/src/http/metastore/error.rs b/crates/api-internal-rest/src/error.rs similarity index 89% rename from crates/runtime/src/http/metastore/error.rs rename to crates/api-internal-rest/src/error.rs index 1bcca34d7..f055b2a17 100644 --- a/crates/runtime/src/http/metastore/error.rs +++ b/crates/api-internal-rest/src/error.rs @@ -1,12 +1,19 @@ -use crate::http::error::ErrorResponse; -use axum::{response::IntoResponse, Json}; -use embucket_metastore::error::MetastoreError; +use axum::{Json, response::IntoResponse}; +use core_metastore::error::MetastoreError; +use http; +use serde::{Deserialize, Serialize}; use snafu::prelude::*; #[derive(Snafu, Debug)] pub struct MetastoreAPIError(pub MetastoreError); pub type MetastoreAPIResult = Result; +#[derive(Debug, Serialize, Deserialize)] +pub struct ErrorResponse { + pub message: String, + pub status_code: u16, +} + impl IntoResponse for MetastoreAPIError { fn into_response(self) -> axum::response::Response { let message = (self.0.to_string(),); diff --git a/crates/api-internal-rest/src/handlers.rs b/crates/api-internal-rest/src/handlers.rs new file mode 100644 index 000000000..08dc125a6 --- /dev/null +++ b/crates/api-internal-rest/src/handlers.rs @@ -0,0 +1,158 @@ +use super::error::{MetastoreAPIError, MetastoreAPIResult}; +use axum::{ + Json, + extract::{Path, Query, State}, +}; +use snafu::ResultExt; + +#[allow(clippy::wildcard_imports)] +use core_metastore::{ + error::{self as metastore_error, MetastoreError}, + *, +}; + +use crate::state::State as AppState; +use core_utils::scan_iterator::ScanIterator; +use validator::Validate; + +pub type RwObjectVec = Vec>; + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct QueryParameters { + #[serde(default)] + pub cascade: Option, +} + +#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] +pub async fn list_volumes( + State(state): State, +) -> MetastoreAPIResult>> { + let volumes = state + .metastore + .iter_volumes() + .collect() + .await + .map_err(|e| MetastoreAPIError(MetastoreError::UtilSlateDB { source: e }))? + .iter() + .map(|v| hide_sensitive(v.clone())) + .collect(); + Ok(Json(volumes)) +} + +#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] +pub async fn get_volume( + State(state): State, + Path(volume_name): Path, +) -> MetastoreAPIResult>> { + match state.metastore.get_volume(&volume_name).await { + Ok(Some(volume)) => Ok(Json(hide_sensitive(volume))), + Ok(None) => Err(MetastoreError::VolumeNotFound { + volume: volume_name.clone(), + } + .into()), + Err(e) => Err(e.into()), + } +} + +#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] +pub async fn create_volume( + State(state): State, + Json(volume): Json, +) -> MetastoreAPIResult>> { + volume + .validate() + .context(metastore_error::ValidationSnafu)?; + state + .metastore + .create_volume(&volume.ident.clone(), volume) + .await + .map_err(MetastoreAPIError) + .map(|v| Json(hide_sensitive(v))) +} + +#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] +pub async fn update_volume( + State(state): State, + Path(volume_name): Path, + Json(volume): Json, +) -> MetastoreAPIResult>> { + volume + .validate() + .context(metastore_error::ValidationSnafu)?; + state + .metastore + .update_volume(&volume_name, volume) + .await + .map_err(MetastoreAPIError) + .map(|v| Json(hide_sensitive(v))) +} + +#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] +pub async fn delete_volume( + State(state): State, + Query(query): Query, + Path(volume_name): Path, +) -> MetastoreAPIResult<()> { + state + .metastore + .delete_volume(&volume_name, query.cascade.unwrap_or_default()) + .await + .map_err(MetastoreAPIError) +} + +#[allow(clippy::needless_pass_by_value)] +#[must_use] +pub fn hide_sensitive(volume: RwObject) -> RwObject { + let mut new_volume = volume; + if let VolumeType::S3(ref mut s3_volume) = new_volume.data.volume { + if let Some(AwsCredentials::AccessKey(ref mut access_key)) = s3_volume.credentials { + access_key.aws_access_key_id = "******".to_string(); + access_key.aws_secret_access_key = "******".to_string(); + } + } + new_volume +} + +#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] +pub async fn list_databases( + State(state): State, +) -> MetastoreAPIResult>>> { + state + .metastore + .iter_databases() + .collect() + .await + .map_err(|e| MetastoreAPIError(MetastoreError::UtilSlateDB { source: e })) + .map(Json) +} + +#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] +pub async fn get_database( + State(state): State, + Path(database_name): Path, +) -> MetastoreAPIResult>> { + match state.metastore.get_database(&database_name).await { + Ok(Some(db)) => Ok(Json(db)), + Ok(None) => Err(MetastoreError::DatabaseNotFound { + db: database_name.clone(), + } + .into()), + Err(e) => Err(e.into()), + } +} + +#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] +pub async fn create_database( + State(state): State, + Json(database): Json, +) -> MetastoreAPIResult>> { + database + .validate() + .context(metastore_error::ValidationSnafu)?; + state + .metastore + .create_database(&database.ident.clone(), database) + .await + .map_err(MetastoreAPIError) + .map(Json) +} diff --git a/crates/runtime/src/http/metastore/mod.rs b/crates/api-internal-rest/src/lib.rs similarity index 76% rename from crates/runtime/src/http/metastore/mod.rs rename to crates/api-internal-rest/src/lib.rs index da5aa8f55..d35ca3751 100644 --- a/crates/runtime/src/http/metastore/mod.rs +++ b/crates/api-internal-rest/src/lib.rs @@ -1,3 +1,4 @@ pub mod error; pub mod handlers; pub mod router; +pub mod state; diff --git a/crates/api-internal-rest/src/router.rs b/crates/api-internal-rest/src/router.rs new file mode 100644 index 000000000..515e120eb --- /dev/null +++ b/crates/api-internal-rest/src/router.rs @@ -0,0 +1,20 @@ +use crate::state::State; +use axum::Router; +use axum::routing::{delete, get, post, put}; + +use crate::handlers::{ + create_database, create_volume, delete_volume, get_database, get_volume, list_databases, + list_volumes, update_volume, +}; + +pub fn create_router() -> Router { + Router::new() + .route("/volumes", get(list_volumes)) + .route("/volumes", post(create_volume)) + .route("/volumes/{volumeName}", get(get_volume)) + .route("/volumes/{volumeName}", put(update_volume)) + .route("/volumes/{volumeName}", delete(delete_volume)) + .route("/databases", get(list_databases)) + .route("/databases", post(create_database)) + .route("/databases/{databaseName}", get(get_database)) +} diff --git a/crates/api-internal-rest/src/state.rs b/crates/api-internal-rest/src/state.rs new file mode 100644 index 000000000..23b6b38dd --- /dev/null +++ b/crates/api-internal-rest/src/state.rs @@ -0,0 +1,15 @@ +use core_metastore::metastore::Metastore; +use std::sync::Arc; + +// Define a State struct that contains shared services or repositories +#[derive(Clone)] +pub struct State { + pub metastore: Arc, +} + +impl State { + // You can add helper methods for state initialization if needed + pub fn new(metastore: Arc) -> Self { + Self { metastore } + } +} diff --git a/crates/api-sessions/Cargo.toml b/crates/api-sessions/Cargo.toml new file mode 100644 index 000000000..bd50c008f --- /dev/null +++ b/crates/api-sessions/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "api-sessions" +version = "0.1.0" +edition = "2024" +license-file.workspace = true + +[dependencies] +core-executor = { path = "../core-executor" } + +async-trait = { workspace = true } +axum = { workspace = true } +tower-sessions = { workspace = true } +tokio = { workspace = true } +time = { workspace = true } +http = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +snafu = { workspace = true } +tracing = { workspace = true } +uuid = { workspace = true } + +[lints] +workspace = true diff --git a/crates/api-sessions/src/lib.rs b/crates/api-sessions/src/lib.rs new file mode 100644 index 000000000..534fc3fb6 --- /dev/null +++ b/crates/api-sessions/src/lib.rs @@ -0,0 +1,2 @@ +pub mod session; +pub use crate::session::{DFSessionId, RequestSessionMemory, RequestSessionStore}; diff --git a/crates/runtime/src/http/session.rs b/crates/api-sessions/src/session.rs similarity index 93% rename from crates/runtime/src/http/session.rs rename to crates/api-sessions/src/session.rs index f91261dc0..3f4139807 100644 --- a/crates/runtime/src/http/session.rs +++ b/crates/api-sessions/src/session.rs @@ -1,17 +1,19 @@ -use crate::http::error::ErrorResponse; -use axum::{extract::FromRequestParts, response::IntoResponse, Json}; +use axum::{Json, extract::FromRequestParts, response::IntoResponse}; use http::request::Parts; -use snafu::prelude::*; +use serde::{Deserialize, Serialize}; use snafu::ResultExt; +use snafu::prelude::*; use std::{collections::HashMap, sync::Arc}; use time::OffsetDateTime; use tokio::sync::Mutex; use tower_sessions::{ + ExpiredDeletion, Session, SessionStore, session::{Id, Record}, - session_store, ExpiredDeletion, Session, SessionStore, + session_store, }; -use crate::execution::service::ExecutionService; +use core_executor::service::ExecutionService; +use uuid; pub type RequestSessionMemory = Arc>>; @@ -108,11 +110,7 @@ impl ExpiredDeletion for RequestSessionStore { .iter() .filter_map( |(id, Record { expiry_date, .. })| { - if *expiry_date <= now { - Some(*id) - } else { - None - } + if *expiry_date <= now { Some(*id) } else { None } }, ) .collect::>(); @@ -176,6 +174,12 @@ pub enum SessionError { }, } +#[derive(Debug, Serialize, Deserialize)] +pub struct ErrorResponse { + pub message: String, + pub status_code: u16, +} + impl IntoResponse for SessionError { fn into_response(self) -> axum::response::Response { let er = ErrorResponse { diff --git a/crates/api-snowflake-rest/Cargo.toml b/crates/api-snowflake-rest/Cargo.toml new file mode 100644 index 000000000..380b4c481 --- /dev/null +++ b/crates/api-snowflake-rest/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "api-snowflake-rest" +version = "0.1.0" +edition = "2024" +license-file.workspace = true + +[dependencies] +api-sessions = { path = "../api-sessions" } +core-metastore = { path = "../core-metastore" } +core-executor = { path = "../core-executor" } + +axum = { workspace = true } +flate2 = "1" +regex = { workspace = true } +base64 = "0.22" +indexmap = { workspace = true } +datafusion = { workspace = true } +snafu = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } +tracing = { workspace = true } +uuid = { workspace = true } + +[lints] +workspace = true diff --git a/crates/api-snowflake-rest/src/error.rs b/crates/api-snowflake-rest/src/error.rs new file mode 100644 index 000000000..a04cb124d --- /dev/null +++ b/crates/api-snowflake-rest/src/error.rs @@ -0,0 +1,197 @@ +use axum::{Json, http, response::IntoResponse}; +use snafu::prelude::*; + +use crate::schemas::JsonResponse; +use core_executor::error::ExecutionError; +use datafusion::arrow::error::ArrowError; + +#[derive(Snafu, Debug)] +#[snafu(visibility(pub(crate)))] +pub enum DbtError { + #[snafu(display("Failed to decompress GZip body"))] + GZipDecompress { source: std::io::Error }, + + #[snafu(display("Failed to parse login request"))] + LoginRequestParse { source: serde_json::Error }, + + #[snafu(display("Failed to parse query body"))] + QueryBodyParse { source: serde_json::Error }, + + #[snafu(display("Missing auth token"))] + MissingAuthToken, + + #[snafu(display("Invalid warehouse_id format"))] + InvalidWarehouseIdFormat { source: uuid::Error }, + + #[snafu(display("Missing DBT session"))] + MissingDbtSession, + + #[snafu(display("Invalid auth data"))] + InvalidAuthData, + + #[snafu(display("Feature not implemented"))] + NotImplemented, + + #[snafu(display("Failed to parse row JSON"))] + RowParse { source: serde_json::Error }, + + #[snafu(display("UTF8 error: {source}"))] + Utf8 { source: std::string::FromUtf8Error }, + + #[snafu(display("Arrow error: {source}"))] + Arrow { source: ArrowError }, + + // #[snafu(transparent)] + // Metastore { + // source: core_metastore::error::MetastoreError, + // }, + #[snafu(transparent)] + Execution { source: ExecutionError }, +} + +pub type DbtResult = std::result::Result; + +impl IntoResponse for DbtError { + fn into_response(self) -> axum::response::Response { + if let Self::Execution { source } = self { + return convert_into_response(&source); + } + // if let Self::Metastore { source } = self { + // return source.into_response(); + // } + + let status_code = match &self { + Self::GZipDecompress { .. } + | Self::LoginRequestParse { .. } + | Self::QueryBodyParse { .. } + | Self::InvalidWarehouseIdFormat { .. } => http::StatusCode::BAD_REQUEST, + Self::RowParse { .. } + | Self::Utf8 { .. } + | Self::Arrow { .. } + // | Self::Metastore { .. } + | Self::Execution { .. } + | Self::NotImplemented { .. } => http::StatusCode::OK, + Self::MissingAuthToken | Self::MissingDbtSession | Self::InvalidAuthData => { + http::StatusCode::UNAUTHORIZED + } + }; + + let message = match &self { + Self::GZipDecompress { source } => format!("failed to decompress GZip body: {source}"), + Self::LoginRequestParse { source } => { + format!("failed to parse login request: {source}") + } + Self::QueryBodyParse { source } => format!("failed to parse query body: {source}"), + Self::InvalidWarehouseIdFormat { source } => format!("invalid warehouse_id: {source}"), + Self::RowParse { source } => format!("failed to parse row JSON: {source}"), + Self::MissingAuthToken | Self::MissingDbtSession | Self::InvalidAuthData => { + "session error".to_string() + } + Self::Utf8 { source } => { + format!("Error encoding UTF8 string: {source}") + } + Self::Arrow { source } => { + format!("Error encoding in Arrow format: {source}") + } + Self::NotImplemented => "feature not implemented".to_string(), + // Self::Metastore { source } => source.to_string(), + Self::Execution { source } => source.to_string(), + }; + + let body = Json(JsonResponse { + success: false, + message: Some(message), + // TODO: On error data field contains details about actual error + // {'data': {'internalError': False, 'unredactedFromSecureObject': False, 'errorCode': '002003', 'age': 0, 'sqlState': '02000', 'queryId': '01bb407f-0002-97af-0004-d66e006a69fa', 'line': 1, 'pos': 14, 'type': 'COMPILATION'}} + data: None, + code: Some(status_code.as_u16().to_string()), + }); + (status_code, body).into_response() + } +} + +fn convert_into_response(error: &ExecutionError) -> axum::response::Response { + let status_code = match error { + ExecutionError::RegisterUDF { .. } + | ExecutionError::RegisterUDAF { .. } + | ExecutionError::InvalidTableIdentifier { .. } + | ExecutionError::InvalidSchemaIdentifier { .. } + | ExecutionError::InvalidFilePath { .. } + | ExecutionError::InvalidBucketIdentifier { .. } + | ExecutionError::TableProviderNotFound { .. } + | ExecutionError::MissingDataFusionSession { .. } + | ExecutionError::Utf8 { .. } + | ExecutionError::VolumeNotFound { .. } + | ExecutionError::ObjectStore { .. } + | ExecutionError::ObjectAlreadyExists { .. } + | ExecutionError::UnsupportedFileFormat { .. } + | ExecutionError::RefreshCatalogList { .. } + | ExecutionError::UrlParse { .. } + | ExecutionError::JobError { .. } + | ExecutionError::UploadFailed { .. } => http::StatusCode::BAD_REQUEST, + ExecutionError::Arrow { .. } + | ExecutionError::S3Tables { .. } + | ExecutionError::Iceberg { .. } + | ExecutionError::CatalogListDowncast + | ExecutionError::CatalogDownCast { .. } + | ExecutionError::RegisterCatalog { .. } => http::StatusCode::INTERNAL_SERVER_ERROR, + ExecutionError::DatabaseNotFound { .. } + | ExecutionError::TableNotFound { .. } + | ExecutionError::SchemaNotFound { .. } + | ExecutionError::CatalogNotFound { .. } + | ExecutionError::Metastore { .. } + | ExecutionError::DataFusion { .. } + | ExecutionError::DataFusionQuery { .. } => http::StatusCode::OK, + }; + + let message = match error { + ExecutionError::DataFusion { source } => format!("DataFusion error: {source}"), + ExecutionError::DataFusionQuery { source, query } => { + format!("DataFusion error: {source}, query: {query}") + } + ExecutionError::InvalidTableIdentifier { ident } => { + format!("Invalid table identifier: {ident}") + } + ExecutionError::InvalidSchemaIdentifier { ident } => { + format!("Invalid schema identifier: {ident}") + } + ExecutionError::InvalidFilePath { path } => format!("Invalid file path: {path}"), + ExecutionError::InvalidBucketIdentifier { ident } => { + format!("Invalid bucket identifier: {ident}") + } + ExecutionError::Arrow { source } => format!("Arrow error: {source}"), + ExecutionError::TableProviderNotFound { table_name } => { + format!("No Table Provider found for table: {table_name}") + } + ExecutionError::MissingDataFusionSession { id } => { + format!("Missing DataFusion session for id: {id}") + } + ExecutionError::Utf8 { source } => format!("Error encoding UTF8 string: {source}"), + ExecutionError::Metastore { source } => format!("Metastore error: {source}"), + ExecutionError::DatabaseNotFound { db } => format!("Database not found: {db}"), + ExecutionError::TableNotFound { table } => format!("Table not found: {table}"), + ExecutionError::SchemaNotFound { schema } => format!("Schema not found: {schema}"), + ExecutionError::VolumeNotFound { volume } => format!("Volume not found: {volume}"), + ExecutionError::ObjectStore { source } => format!("Object store error: {source}"), + ExecutionError::ObjectAlreadyExists { type_name, name } => { + format!("Object of type {type_name} with name {name} already exists") + } + ExecutionError::UnsupportedFileFormat { format } => { + format!("Unsupported file format {format}") + } + ExecutionError::RefreshCatalogList { source } => { + format!("Refresh catalog list error: {source}") + } + _ => "Internal server error".to_string(), + }; + + let body = Json(JsonResponse { + success: false, + message: Some(message), + // TODO: On error data field contains details about actual error + // {'data': {'internalError': False, 'unredactedFromSecureObject': False, 'errorCode': '002003', 'age': 0, 'sqlState': '02000', 'queryId': '01bb407f-0002-97af-0004-d66e006a69fa', 'line': 1, 'pos': 14, 'type': 'COMPILATION'}} + data: None, + code: Some(status_code.as_u16().to_string()), + }); + (status_code, body).into_response() +} diff --git a/crates/runtime/src/http/dbt/handlers.rs b/crates/api-snowflake-rest/src/handlers.rs similarity index 95% rename from crates/runtime/src/http/dbt/handlers.rs rename to crates/api-snowflake-rest/src/handlers.rs index 214e3473b..8b6bb9a40 100644 --- a/crates/runtime/src/http/dbt/handlers.rs +++ b/crates/api-snowflake-rest/src/handlers.rs @@ -1,23 +1,22 @@ -use super::error::{self as dbt_error, DbtError, DbtResult}; -use crate::execution::query::QueryContext; -use crate::execution::utils::DataSerializationFormat; -use crate::http::dbt::schemas::{ +use crate::error::{self as dbt_error, DbtError, DbtResult}; +use crate::schemas::{ JsonResponse, LoginData, LoginRequestBody, LoginRequestQuery, LoginResponse, QueryRequest, QueryRequestBody, ResponseData, }; -use crate::http::session::DFSessionId; -use crate::http::state::AppState; +use crate::state::AppState; +use api_sessions::DFSessionId; +use axum::Json; use axum::body::Bytes; use axum::extract::{Query, State}; use axum::http::HeaderMap; -use axum::Json; use base64; use base64::engine::general_purpose::STANDARD as engine_base64; use base64::prelude::*; -use datafusion::arrow::ipc::writer::{IpcWriteOptions, StreamWriter}; +use core_executor::{query::QueryContext, utils::DataSerializationFormat}; use datafusion::arrow::ipc::MetadataVersion; -use datafusion::arrow::json::writer::JsonArray; +use datafusion::arrow::ipc::writer::{IpcWriteOptions, StreamWriter}; use datafusion::arrow::json::WriterBuilder; +use datafusion::arrow::json::writer::JsonArray; use datafusion::arrow::record_batch::RecordBatch; use flate2::read::GzDecoder; use regex::Regex; diff --git a/crates/runtime/src/http/catalog/mod.rs b/crates/api-snowflake-rest/src/lib.rs similarity index 62% rename from crates/runtime/src/http/catalog/mod.rs rename to crates/api-snowflake-rest/src/lib.rs index a9a9f7926..e89367a8a 100644 --- a/crates/runtime/src/http/catalog/mod.rs +++ b/crates/api-snowflake-rest/src/lib.rs @@ -1,3 +1,5 @@ +pub mod error; pub mod handlers; pub mod router; pub mod schemas; +pub mod state; diff --git a/crates/runtime/src/http/dbt/router.rs b/crates/api-snowflake-rest/src/router.rs similarity index 75% rename from crates/runtime/src/http/dbt/router.rs rename to crates/api-snowflake-rest/src/router.rs index f809bb20d..77d0d398d 100644 --- a/crates/runtime/src/http/dbt/router.rs +++ b/crates/api-snowflake-rest/src/router.rs @@ -1,8 +1,7 @@ -use crate::http::state::AppState; -use axum::routing::post; +use crate::handlers::{abort, login, query}; +use crate::state::AppState; use axum::Router; - -use crate::http::dbt::handlers::{abort, login, query}; +use axum::routing::post; pub fn create_router() -> Router { Router::new() diff --git a/crates/runtime/src/http/dbt/schemas.rs b/crates/api-snowflake-rest/src/schemas.rs similarity index 98% rename from crates/runtime/src/http/dbt/schemas.rs rename to crates/api-snowflake-rest/src/schemas.rs index 618ec3e63..278909379 100644 --- a/crates/runtime/src/http/dbt/schemas.rs +++ b/crates/api-snowflake-rest/src/schemas.rs @@ -1,5 +1,5 @@ use super::error::{self as dbt_error, DbtResult}; -use crate::execution::models::ColumnInfo as ColumnInfoModel; +use core_executor::models::ColumnInfo as ColumnInfoModel; use indexmap::IndexMap; use serde::{Deserialize, Serialize}; use snafu::ResultExt; diff --git a/crates/api-snowflake-rest/src/state.rs b/crates/api-snowflake-rest/src/state.rs new file mode 100644 index 000000000..810227da8 --- /dev/null +++ b/crates/api-snowflake-rest/src/state.rs @@ -0,0 +1,7 @@ +use core_executor::service::ExecutionService; +use std::sync::Arc; + +#[derive(Clone)] +pub struct AppState { + pub execution_svc: Arc, +} diff --git a/crates/api-ui/Cargo.toml b/crates/api-ui/Cargo.toml new file mode 100644 index 000000000..5c7a7fdd5 --- /dev/null +++ b/crates/api-ui/Cargo.toml @@ -0,0 +1,38 @@ +[package] +name = "api-ui" +version = "0.1.0" +edition = "2024" +license-file.workspace = true + +[dependencies] +api-sessions = { path = "../api-sessions" } +core-metastore = { path = "../core-metastore" } +core-utils = { path = "../core-utils" } +core-executor = { path = "../core-executor" } +core-history = { path = "../core-history" } + +axum = { workspace = true } +chrono = { workspace = true } +datafusion = { workspace = true } +indexmap = { workspace = true } +jsonwebtoken = { workspace = true } +http = { workspace = true } +mime_guess = "2" +serde = { workspace = true } +serde_json = { workspace = true } +snafu = { workspace = true } +tar = { workspace = true } +tokio = { workspace = true } +tracing = { workspace = true } +tower-http = { workspace = true } +tower-sessions = { workspace = true } +time = { workspace = true } +utoipa = { workspace = true } +uuid = { workspace = true } +validator = { workspace = true } + +[dev-dependencies] +reqwest = "0.12.14" + +[lints] +workspace = true diff --git a/crates/runtime/src/http/auth/error.rs b/crates/api-ui/src/auth/error.rs similarity index 98% rename from crates/runtime/src/http/auth/error.rs rename to crates/api-ui/src/auth/error.rs index a616fab5b..03ce2ac19 100644 --- a/crates/runtime/src/http/auth/error.rs +++ b/crates/api-ui/src/auth/error.rs @@ -1,8 +1,8 @@ -use axum::{http, response::IntoResponse, Json}; +use axum::{Json, http, response::IntoResponse}; +use http::HeaderValue; use http::header; use http::header::InvalidHeaderValue; -use http::HeaderValue; -use http::{header::MaxSizeReached, StatusCode}; +use http::{StatusCode, header::MaxSizeReached}; use jsonwebtoken::errors::{Error as JwtError, ErrorKind as JwtErrorKind}; use serde::{Deserialize, Serialize}; use snafu::prelude::*; diff --git a/crates/runtime/src/http/auth/handlers.rs b/crates/api-ui/src/auth/handlers.rs similarity index 97% rename from crates/runtime/src/http/auth/handlers.rs rename to crates/api-ui/src/auth/handlers.rs index f79bd61fc..4ffad8065 100644 --- a/crates/runtime/src/http/auth/handlers.rs +++ b/crates/api-ui/src/auth/handlers.rs @@ -1,21 +1,21 @@ -use crate::http::auth::models::{AccountResponse, RefreshTokenResponse}; +use crate::auth::models::{AccountResponse, RefreshTokenResponse}; use std::collections::HashMap; use super::error::AuthErrorResponse; use super::error::CreateJwtSnafu; -use crate::http::auth::error::{ +use crate::auth::error::{ AuthError, AuthResult, BadRefreshTokenSnafu, ResponseHeaderSnafu, SetCookieSnafu, TokenErrorKind, }; -use crate::http::auth::models::{AuthResponse, Claims, LoginPayload}; -use crate::http::state::AppState; +use crate::auth::models::{AuthResponse, Claims, LoginPayload}; +use crate::state::AppState; +use axum::Json; use axum::extract::State; use axum::response::IntoResponse; -use axum::Json; use chrono::offset::Local; -use http::header::SET_COOKIE; use http::HeaderMap; -use jsonwebtoken::{decode, encode, DecodingKey, EncodingKey, Header, Validation}; +use http::header::SET_COOKIE; +use jsonwebtoken::{DecodingKey, EncodingKey, Header, Validation, decode, encode}; use serde::Serialize; use snafu::ResultExt; use time::Duration; diff --git a/crates/runtime/src/http/auth/layer.rs b/crates/api-ui/src/auth/layer.rs similarity index 97% rename from crates/runtime/src/http/auth/layer.rs rename to crates/api-ui/src/auth/layer.rs index c5cb9008c..98b5db856 100644 --- a/crates/runtime/src/http/auth/layer.rs +++ b/crates/api-ui/src/auth/layer.rs @@ -1,6 +1,6 @@ use super::error::{AuthError, AuthResult, BadAuthTokenSnafu}; use super::handlers::get_claims_validate_jwt_token; -use crate::http::state::AppState; +use crate::state::AppState; use axum::{ extract::{Request, State}, middleware::Next, diff --git a/crates/runtime/src/http/auth/mod.rs b/crates/api-ui/src/auth/mod.rs similarity index 100% rename from crates/runtime/src/http/auth/mod.rs rename to crates/api-ui/src/auth/mod.rs diff --git a/crates/runtime/src/http/auth/models.rs b/crates/api-ui/src/auth/models.rs similarity index 100% rename from crates/runtime/src/http/auth/models.rs rename to crates/api-ui/src/auth/models.rs diff --git a/crates/runtime/src/http/auth/router.rs b/crates/api-ui/src/auth/router.rs similarity index 55% rename from crates/runtime/src/http/auth/router.rs rename to crates/api-ui/src/auth/router.rs index 09ea456eb..7231d6379 100644 --- a/crates/runtime/src/http/auth/router.rs +++ b/crates/api-ui/src/auth/router.rs @@ -1,13 +1,13 @@ -use crate::http::state::AppState; -use axum::routing::{get, post}; +use crate::state::AppState; use axum::Router; +use axum::routing::{get, post}; use super::handlers::{account, login, logout, refresh_access_token}; pub fn create_router() -> Router { Router::new() - .route("/auth/login", post(login)) - .route("/auth/refresh", post(refresh_access_token)) - .route("/auth/logout", post(logout)) + .route("/login", post(login)) + .route("/refresh", post(refresh_access_token)) + .route("/logout", post(logout)) .route("/account", get(account)) } diff --git a/crates/runtime/src/config.rs b/crates/api-ui/src/config.rs similarity index 71% rename from crates/runtime/src/config.rs rename to crates/api-ui/src/config.rs index 1f1306c94..1557b7fc3 100644 --- a/crates/runtime/src/config.rs +++ b/crates/api-ui/src/config.rs @@ -1,20 +1,11 @@ -#![allow(clippy::missing_const_for_fn)] - +pub use crate::web_assets::config::StaticWebConfig; use serde::{Deserialize, Serialize}; -use crate::http::config::WebConfig; -use crate::http::web_assets::config::StaticWebConfig; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct RuntimeConfig { - pub web: WebConfig, - pub web_assets: StaticWebConfig, - pub db: DbConfig, -} - #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct DbConfig { - pub slatedb_prefix: String, +pub struct WebConfig { + pub host: String, + pub port: u16, + pub allow_origin: Option, } // Non serializable, no Clone, Copy, Debug traits diff --git a/crates/runtime/src/http/ui/dashboard/error.rs b/crates/api-ui/src/dashboard/error.rs similarity index 84% rename from crates/runtime/src/http/ui/dashboard/error.rs rename to crates/api-ui/src/dashboard/error.rs index adf3f83c1..5f619c8c8 100644 --- a/crates/runtime/src/http/ui/dashboard/error.rs +++ b/crates/api-ui/src/dashboard/error.rs @@ -1,9 +1,9 @@ -use crate::http::error::ErrorResponse; -use crate::http::ui::error::IntoStatusCode; -use crate::http::ui::queries::error::QueryError; -use axum::response::IntoResponse; +use crate::error::ErrorResponse; +use crate::error::IntoStatusCode; +use crate::queries::error::QueryError; use axum::Json; -use embucket_metastore::error::MetastoreError; +use axum::response::IntoResponse; +use core_metastore::error::MetastoreError; use http::StatusCode; use snafu::prelude::*; diff --git a/crates/runtime/src/http/ui/dashboard/handlers.rs b/crates/api-ui/src/dashboard/handlers.rs similarity index 85% rename from crates/runtime/src/http/ui/dashboard/handlers.rs rename to crates/api-ui/src/dashboard/handlers.rs index 98bd13202..f010e5366 100644 --- a/crates/runtime/src/http/ui/dashboard/handlers.rs +++ b/crates/api-ui/src/dashboard/handlers.rs @@ -1,12 +1,12 @@ -use crate::http::error::ErrorResponse; -use crate::http::state::AppState; -use crate::http::ui::dashboard::error::{DashboardAPIError, DashboardResult}; -use crate::http::ui::dashboard::models::{Dashboard, DashboardResponse}; -use crate::http::ui::queries::error::QueryError; -use axum::{extract::State, Json}; -use embucket_history::GetQueries; -use embucket_metastore::error::MetastoreError; -use embucket_utils::scan_iterator::ScanIterator; +use crate::dashboard::error::{DashboardAPIError, DashboardResult}; +use crate::dashboard::models::{Dashboard, DashboardResponse}; +use crate::error::ErrorResponse; +use crate::queries::error::QueryError; +use crate::state::AppState; +use axum::{Json, extract::State}; +use core_history::GetQueries; +use core_metastore::error::MetastoreError; +use core_utils::scan_iterator::ScanIterator; use utoipa::OpenApi; #[derive(OpenApi)] diff --git a/crates/runtime/src/http/ui/dashboard/mod.rs b/crates/api-ui/src/dashboard/mod.rs similarity index 100% rename from crates/runtime/src/http/ui/dashboard/mod.rs rename to crates/api-ui/src/dashboard/mod.rs diff --git a/crates/runtime/src/http/ui/dashboard/models.rs b/crates/api-ui/src/dashboard/models.rs similarity index 100% rename from crates/runtime/src/http/ui/dashboard/models.rs rename to crates/api-ui/src/dashboard/models.rs diff --git a/crates/runtime/src/http/ui/databases/error.rs b/crates/api-ui/src/databases/error.rs similarity index 94% rename from crates/runtime/src/http/ui/databases/error.rs rename to crates/api-ui/src/databases/error.rs index 1c9e3e678..afa8e4b1c 100644 --- a/crates/runtime/src/http/ui/databases/error.rs +++ b/crates/api-ui/src/databases/error.rs @@ -1,8 +1,8 @@ -use crate::http::error::ErrorResponse; -use crate::http::ui::error::IntoStatusCode; -use axum::response::IntoResponse; +use crate::error::ErrorResponse; +use crate::error::IntoStatusCode; use axum::Json; -use embucket_metastore::error::MetastoreError; +use axum::response::IntoResponse; +use core_metastore::error::MetastoreError; use http::StatusCode; use snafu::prelude::*; diff --git a/crates/runtime/src/http/ui/databases/handlers.rs b/crates/api-ui/src/databases/handlers.rs similarity index 94% rename from crates/runtime/src/http/ui/databases/handlers.rs rename to crates/api-ui/src/databases/handlers.rs index c876b9806..1d77507e7 100644 --- a/crates/runtime/src/http/ui/databases/handlers.rs +++ b/crates/api-ui/src/databases/handlers.rs @@ -1,21 +1,20 @@ -use crate::http::state::AppState; -use crate::http::ui::databases::models::DatabasesParameters; -use crate::http::{ - error::ErrorResponse, - metastore::handlers::QueryParameters, - ui::databases::error::{DatabasesAPIError, DatabasesResult}, - ui::databases::models::{ +use crate::databases::models::DatabasesParameters; +use crate::state::AppState; +use crate::{ + databases::error::{DatabasesAPIError, DatabasesResult}, + databases::models::{ Database, DatabaseCreatePayload, DatabaseCreateResponse, DatabaseResponse, DatabaseUpdatePayload, DatabaseUpdateResponse, DatabasesResponse, }, + error::ErrorResponse, }; use axum::{ - extract::{Path, Query, State}, Json, + extract::{Path, Query, State}, }; -use embucket_metastore::error::MetastoreError; -use embucket_metastore::Database as MetastoreDatabase; -use embucket_utils::scan_iterator::ScanIterator; +use core_metastore::Database as MetastoreDatabase; +use core_metastore::error::MetastoreError; +use core_utils::scan_iterator::ScanIterator; use utoipa::OpenApi; use validator::Validate; @@ -44,6 +43,12 @@ use validator::Validate; )] pub struct ApiDoc; +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct QueryParameters { + #[serde(default)] + pub cascade: Option, +} + #[utoipa::path( post, operation_id = "createDatabase", diff --git a/crates/runtime/src/http/ui/databases/mod.rs b/crates/api-ui/src/databases/mod.rs similarity index 100% rename from crates/runtime/src/http/ui/databases/mod.rs rename to crates/api-ui/src/databases/mod.rs diff --git a/crates/runtime/src/http/ui/databases/models.rs b/crates/api-ui/src/databases/models.rs similarity index 95% rename from crates/runtime/src/http/ui/databases/models.rs rename to crates/api-ui/src/databases/models.rs index dd9898bf9..b13b4e0bc 100644 --- a/crates/runtime/src/http/ui/databases/models.rs +++ b/crates/api-ui/src/databases/models.rs @@ -1,5 +1,5 @@ -use crate::http::ui::default_limit; -use embucket_metastore::models::Database as MetastoreDatabase; +use crate::default_limit; +use core_metastore::models::Database as MetastoreDatabase; use serde::{Deserialize, Serialize}; use utoipa::{IntoParams, ToSchema}; diff --git a/crates/api-ui/src/error.rs b/crates/api-ui/src/error.rs new file mode 100644 index 000000000..f848d2671 --- /dev/null +++ b/crates/api-ui/src/error.rs @@ -0,0 +1,171 @@ +use axum::Json; +use axum::{response::IntoResponse, response::Response}; +use core_executor::error::ExecutionError; +use core_metastore::error::MetastoreError; +use http::StatusCode; +use serde::{Deserialize, Serialize}; +use snafu::prelude::*; + +#[derive(Snafu, Debug)] +#[snafu(visibility(pub))] +pub enum UIError { + #[snafu(transparent)] + Execution { source: ExecutionError }, + #[snafu(transparent)] + Metastore { source: MetastoreError }, +} +pub type UIResult = Result; + +pub(crate) trait IntoStatusCode { + fn status_code(&self) -> StatusCode; +} + +// #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +// #[serde(rename_all = "camelCase")] +// pub(crate) struct UIResponse { +// #[serde(flatten)] +// pub(crate) data: T, +// } +// +// impl UIResponse { +// pub const fn from(data: T) -> Json { +// Json(Self { data }) +// } +// } + +#[derive(Debug, Serialize, Deserialize, utoipa::ToSchema)] +pub struct ErrorResponse { + pub message: String, + pub status_code: u16, +} + +impl IntoResponse for UIError { + fn into_response(self) -> Response { + match self { + Self::Execution { source } => exec_error_into_response(&source), + Self::Metastore { source } => metastore_error_into_response(&source), + } + } +} + +fn metastore_error_into_response(error: &MetastoreError) -> axum::response::Response { + let message = error.to_string(); + let code = match error { + MetastoreError::TableDataExists { .. } + | MetastoreError::ObjectAlreadyExists { .. } + | MetastoreError::VolumeAlreadyExists { .. } + | MetastoreError::DatabaseAlreadyExists { .. } + | MetastoreError::SchemaAlreadyExists { .. } + | MetastoreError::TableAlreadyExists { .. } + | MetastoreError::VolumeInUse { .. } => http::StatusCode::CONFLICT, + MetastoreError::TableRequirementFailed { .. } => http::StatusCode::UNPROCESSABLE_ENTITY, + MetastoreError::VolumeValidationFailed { .. } + | MetastoreError::VolumeMissingCredentials + | MetastoreError::Validation { .. } => http::StatusCode::BAD_REQUEST, + MetastoreError::CloudProviderNotImplemented { .. } => http::StatusCode::PRECONDITION_FAILED, + MetastoreError::VolumeNotFound { .. } + | MetastoreError::DatabaseNotFound { .. } + | MetastoreError::SchemaNotFound { .. } + | MetastoreError::TableNotFound { .. } + | MetastoreError::ObjectNotFound => http::StatusCode::NOT_FOUND, + MetastoreError::ObjectStore { .. } + | MetastoreError::ObjectStorePath { .. } + | MetastoreError::CreateDirectory { .. } + | MetastoreError::SlateDB { .. } + | MetastoreError::UtilSlateDB { .. } + | MetastoreError::Iceberg { .. } + | MetastoreError::Serde { .. } + | MetastoreError::TableMetadataBuilder { .. } + | MetastoreError::TableObjectStoreNotFound { .. } + | MetastoreError::UrlParse { .. } => http::StatusCode::INTERNAL_SERVER_ERROR, + }; + + let error = ErrorResponse { + message, + status_code: code.as_u16(), + }; + (code, Json(error)).into_response() +} + +fn exec_error_into_response(error: &ExecutionError) -> axum::response::Response { + let status_code = match error { + ExecutionError::RegisterUDF { .. } + | ExecutionError::RegisterUDAF { .. } + | ExecutionError::InvalidTableIdentifier { .. } + | ExecutionError::InvalidSchemaIdentifier { .. } + | ExecutionError::InvalidFilePath { .. } + | ExecutionError::InvalidBucketIdentifier { .. } + | ExecutionError::TableProviderNotFound { .. } + | ExecutionError::MissingDataFusionSession { .. } + | ExecutionError::Utf8 { .. } + | ExecutionError::VolumeNotFound { .. } + | ExecutionError::ObjectStore { .. } + | ExecutionError::ObjectAlreadyExists { .. } + | ExecutionError::UnsupportedFileFormat { .. } + | ExecutionError::RefreshCatalogList { .. } + | ExecutionError::UrlParse { .. } + | ExecutionError::JobError { .. } + | ExecutionError::UploadFailed { .. } => http::StatusCode::BAD_REQUEST, + ExecutionError::Arrow { .. } + | ExecutionError::S3Tables { .. } + | ExecutionError::Iceberg { .. } + | ExecutionError::CatalogListDowncast + | ExecutionError::CatalogDownCast { .. } + | ExecutionError::RegisterCatalog { .. } => http::StatusCode::INTERNAL_SERVER_ERROR, + ExecutionError::DatabaseNotFound { .. } + | ExecutionError::TableNotFound { .. } + | ExecutionError::SchemaNotFound { .. } + | ExecutionError::CatalogNotFound { .. } + | ExecutionError::Metastore { .. } + | ExecutionError::DataFusion { .. } + | ExecutionError::DataFusionQuery { .. } => http::StatusCode::OK, + }; + + let message = match &error { + ExecutionError::DataFusion { source } => format!("DataFusion error: {source}"), + ExecutionError::DataFusionQuery { source, query } => { + format!("DataFusion error: {source}, query: {query}") + } + ExecutionError::InvalidTableIdentifier { ident } => { + format!("Invalid table identifier: {ident}") + } + ExecutionError::InvalidSchemaIdentifier { ident } => { + format!("Invalid schema identifier: {ident}") + } + ExecutionError::InvalidFilePath { path } => format!("Invalid file path: {path}"), + ExecutionError::InvalidBucketIdentifier { ident } => { + format!("Invalid bucket identifier: {ident}") + } + ExecutionError::Arrow { source } => format!("Arrow error: {source}"), + ExecutionError::TableProviderNotFound { table_name } => { + format!("No Table Provider found for table: {table_name}") + } + ExecutionError::MissingDataFusionSession { id } => { + format!("Missing DataFusion session for id: {id}") + } + ExecutionError::Utf8 { source } => format!("Error encoding UTF8 string: {source}"), + ExecutionError::Metastore { source } => format!("Metastore error: {source}"), + ExecutionError::DatabaseNotFound { db } => format!("Database not found: {db}"), + ExecutionError::TableNotFound { table } => format!("Table not found: {table}"), + ExecutionError::SchemaNotFound { schema } => format!("Schema not found: {schema}"), + ExecutionError::VolumeNotFound { volume } => format!("Volume not found: {volume}"), + ExecutionError::ObjectStore { source } => format!("Object store error: {source}"), + ExecutionError::ObjectAlreadyExists { type_name, name } => { + format!("Object of type {type_name} with name {name} already exists") + } + ExecutionError::UnsupportedFileFormat { format } => { + format!("Unsupported file format {format}") + } + ExecutionError::RefreshCatalogList { source } => { + format!("Refresh Catalog List error: {source}") + } + _ => "Internal server error".to_string(), + }; + + // TODO: Is it correct?! + let error = ErrorResponse { + message, + status_code: status_code.as_u16(), + }; + (status_code, Json(error)).into_response() +} diff --git a/crates/runtime/src/http/layers.rs b/crates/api-ui/src/layers.rs similarity index 91% rename from crates/runtime/src/http/layers.rs rename to crates/api-ui/src/layers.rs index 722e07a0c..dc12a4794 100644 --- a/crates/runtime/src/http/layers.rs +++ b/crates/api-ui/src/layers.rs @@ -7,8 +7,6 @@ use std::str::FromStr; use tower_http::cors::CorsLayer; use uuid::Uuid; -use super::error; - #[derive(Clone)] struct RequestMetadata { request_id: Uuid, @@ -48,12 +46,12 @@ pub async fn add_request_metadata( } #[allow(clippy::needless_pass_by_value, clippy::expect_used)] -pub fn make_cors_middleware(origin: &str) -> Result { +pub fn make_cors_middleware(origin: &str) -> CorsLayer { #[allow(clippy::expect_fun_call)] let origin_value = origin .parse::() .expect(&format!("Failed to parse origin value: {origin}")); - Ok(CorsLayer::new() + CorsLayer::new() .allow_origin(origin_value) .allow_methods(vec![ Method::GET, @@ -64,5 +62,5 @@ pub fn make_cors_middleware(origin: &str) -> Result = Result; pub enum QueryError { #[snafu(transparent)] Execution { - source: crate::execution::error::ExecutionError, + source: core_executor::error::ExecutionError, }, #[snafu(transparent)] Store { source: WorksheetsStoreError }, #[snafu(display("Failed to parse row JSON: {source}"))] ResultParse { source: serde_json::Error }, #[snafu(display("ResultSet create error: {source}"))] - CreateResultSet { source: arrow::error::ArrowError }, + CreateResultSet { + source: datafusion::arrow::error::ArrowError, + }, #[snafu(display("Error encoding UTF8 string: {source}"))] Utf8 { source: std::string::FromUtf8Error }, } diff --git a/crates/runtime/src/http/ui/queries/handlers.rs b/crates/api-ui/src/queries/handlers.rs similarity index 92% rename from crates/runtime/src/http/ui/queries/handlers.rs rename to crates/api-ui/src/queries/handlers.rs index dc8738a1d..681c985c9 100644 --- a/crates/runtime/src/http/ui/queries/handlers.rs +++ b/crates/api-ui/src/queries/handlers.rs @@ -1,20 +1,20 @@ -use crate::execution::query::QueryContext; -use crate::http::session::DFSessionId; -use crate::http::state::AppState; -use crate::http::ui::queries::models::{ +use crate::queries::models::{ GetQueriesParams, QueriesResponse, QueryCreatePayload, QueryCreateResponse, QueryRecord, ResultSet, }; -use crate::http::{ +use crate::state::AppState; +use crate::{ error::ErrorResponse, - ui::queries::error::{QueriesAPIError, QueriesResult, QueryError}, + queries::error::{QueriesAPIError, QueriesResult, QueryError}, }; +use api_sessions::DFSessionId; use axum::{ - extract::{Query, State}, Json, + extract::{Query, State}, }; -use embucket_history::{QueryRecordActions, QueryRecordId, WorksheetId}; -use embucket_utils::iterable::IterableEntity; +use core_executor::query::QueryContext; +use core_history::{QueryRecordActions, QueryRecordId, WorksheetId}; +use core_utils::iterable::IterableEntity; use std::collections::HashMap; use utoipa::OpenApi; @@ -96,7 +96,7 @@ pub async fn query( //TODO: map to result correctly without using duplicate code let mut query_record = - embucket_history::QueryRecord::query_start(&payload.query, payload.worksheet_id); + core_history::QueryRecord::query_start(&payload.query, payload.worksheet_id); let query_res = state .execution_svc .query(&session_id, &payload.query, query_context) @@ -185,7 +185,7 @@ pub async fn queries( let next_cursor = if let Some(last_item) = recs.last() { last_item.next_cursor() } else { - embucket_history::QueryRecord::min_cursor() // no items in range -> go to beginning + core_history::QueryRecord::min_cursor() // no items in range -> go to beginning }; let queries: Vec = recs .clone() diff --git a/crates/runtime/src/http/ui/queries/mod.rs b/crates/api-ui/src/queries/mod.rs similarity index 100% rename from crates/runtime/src/http/ui/queries/mod.rs rename to crates/api-ui/src/queries/mod.rs diff --git a/crates/runtime/src/http/ui/queries/models.rs b/crates/api-ui/src/queries/models.rs similarity index 90% rename from crates/runtime/src/http/ui/queries/models.rs rename to crates/api-ui/src/queries/models.rs index e098b27d5..cfe84ca94 100644 --- a/crates/runtime/src/http/ui/queries/models.rs +++ b/crates/api-ui/src/queries/models.rs @@ -1,12 +1,12 @@ use super::error::{ CreateResultSetSnafu, QueryError, QueryRecordResult, ResultParseSnafu, Utf8Snafu, }; -use crate::execution::models::ColumnInfo; -use crate::http::ui::default_limit; +use crate::default_limit; use chrono::{DateTime, Utc}; +use core_executor::models::ColumnInfo; +use core_history::{QueryRecordId, QueryStatus as QueryStatusItem, WorksheetId}; use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::json::{writer::JsonArray, WriterBuilder}; -use embucket_history::{QueryRecordId, QueryStatus as QueryStatusItem, WorksheetId}; +use datafusion::arrow::json::{WriterBuilder, writer::JsonArray}; use indexmap::IndexMap; use serde::{Deserialize, Serialize}; use serde_json::Value; @@ -14,7 +14,7 @@ use snafu::ResultExt; use std::collections::HashMap; use utoipa::ToSchema; -pub type ExecutionContext = crate::execution::query::QueryContext; +pub type ExecutionContext = core_executor::query::QueryContext; #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] #[serde(rename_all = "camelCase")] @@ -131,10 +131,10 @@ pub struct QueryRecord { pub error: String, // empty error - ok } -impl TryFrom for QueryRecord { +impl TryFrom for QueryRecord { type Error = QueryError; - fn try_from(query: embucket_history::QueryRecord) -> QueryRecordResult { + fn try_from(query: core_history::QueryRecord) -> QueryRecordResult { let query_result = query.result.unwrap_or_default(); let query_error = query.error.unwrap_or_default(); let result_set = if query_result.is_empty() { @@ -180,9 +180,9 @@ pub struct GetQueriesParams { } #[allow(clippy::from_over_into)] -impl Into for GetQueriesParams { - fn into(self) -> embucket_history::GetQueries { - embucket_history::GetQueries { +impl Into for GetQueriesParams { + fn into(self) -> core_history::GetQueries { + core_history::GetQueries { worksheet_id: self.worksheet_id, sql_text: self.sql_text, min_duration_ms: self.min_duration_ms, diff --git a/crates/runtime/src/http/ui/router.rs b/crates/api-ui/src/router.rs similarity index 74% rename from crates/runtime/src/http/ui/router.rs rename to crates/api-ui/src/router.rs index d500c340e..79e838d9d 100644 --- a/crates/runtime/src/http/ui/router.rs +++ b/crates/api-ui/src/router.rs @@ -1,40 +1,34 @@ -use crate::http::layers::add_request_metadata; -use crate::http::ui::databases::handlers::{ +use crate::databases::handlers::ApiDoc as DatabasesApiDoc; +use crate::databases::handlers::{ create_database, delete_database, get_database, list_databases, update_database, }; -use crate::http::ui::schemas::handlers::{create_schema, delete_schema, list_schemas}; -use crate::http::ui::volumes::handlers::{ - create_volume, delete_volume, get_volume, list_volumes, update_volume, +use crate::layers::add_request_metadata; +use crate::navigation_trees::handlers::{ + ApiDoc as DatabasesNavigationApiDoc, get_navigation_trees, }; -use crate::http::ui::worksheets::handlers::{ - create_worksheet, delete_worksheet, update_worksheet, worksheet, worksheets, +use crate::queries::handlers::ApiDoc as QueryApiDoc; +use crate::queries::handlers::{queries, query}; +use crate::schemas::handlers::ApiDoc as SchemasApiDoc; +use crate::schemas::handlers::{create_schema, delete_schema, list_schemas}; +use crate::tables::handlers::{ + ApiDoc as TableApiDoc, get_table_columns, get_table_preview_data, get_table_statistics, + get_tables, upload_file, }; - -use crate::http::ui::queries::handlers::{queries, query}; -// use crate::http::ui::old_handlers::tables::{ -// create_table, delete_table, get_settings, get_snapshots, get_table, register_table, -// update_table_properties, -// }; - -use crate::http::ui::databases::handlers::ApiDoc as DatabasesApiDoc; -use crate::http::ui::navigation_trees::handlers::{ - get_navigation_trees, ApiDoc as DatabasesNavigationApiDoc, +use crate::volumes::handlers::ApiDoc as VolumesApiDoc; +use crate::volumes::handlers::{ + create_volume, delete_volume, get_volume, list_volumes, update_volume, }; -use crate::http::ui::queries::handlers::ApiDoc as QueryApiDoc; -use crate::http::ui::schemas::handlers::ApiDoc as SchemasApiDoc; -use crate::http::ui::tables::handlers::{ - get_table_columns, get_table_preview_data, get_table_statistics, get_tables, upload_file, - ApiDoc as TableApiDoc, +use crate::worksheets::handlers::ApiDoc as WorksheetsApiDoc; +use crate::worksheets::handlers::{ + create_worksheet, delete_worksheet, update_worksheet, worksheet, worksheets, }; -use crate::http::ui::volumes::handlers::ApiDoc as VolumesApiDoc; -use crate::http::ui::worksheets::handlers::ApiDoc as WorksheetsApiDoc; -use crate::http::auth::handlers::ApiDoc as AuthApiDoc; -use crate::http::state::AppState; -use crate::http::ui::dashboard::handlers::{get_dashboard, ApiDoc as DashboardApiDoc}; +use crate::auth::handlers::ApiDoc as AuthApiDoc; +use crate::dashboard::handlers::{ApiDoc as DashboardApiDoc, get_dashboard}; +use crate::state::AppState; +use axum::Router; use axum::extract::DefaultBodyLimit; use axum::routing::{delete, get, post}; -use axum::Router; use tower_http::sensitive_headers::SetSensitiveHeadersLayer; use utoipa::OpenApi; @@ -70,7 +64,7 @@ pub fn ui_open_api_spec() -> utoipa::openapi::OpenApi { } pub fn create_router() -> Router { - let ui_router = Router::new() + Router::new() // .route("/navigation_trees", get(navigation_trees)) .route("/navigation-trees", get(get_navigation_trees)) .route("/dashboard", get(get_dashboard)) @@ -142,7 +136,5 @@ pub fn create_router() -> Router { axum::http::header::AUTHORIZATION, ])) .layer(axum::middleware::from_fn(add_request_metadata)) - .layer(DefaultBodyLimit::disable()); - - Router::new().nest("/ui", ui_router) + .layer(DefaultBodyLimit::disable()) } diff --git a/crates/runtime/src/http/ui/schemas/error.rs b/crates/api-ui/src/schemas/error.rs similarity index 93% rename from crates/runtime/src/http/ui/schemas/error.rs rename to crates/api-ui/src/schemas/error.rs index 7ab80ba7c..8973bdf75 100644 --- a/crates/runtime/src/http/ui/schemas/error.rs +++ b/crates/api-ui/src/schemas/error.rs @@ -1,9 +1,9 @@ -use crate::execution::error::ExecutionError; -use crate::http::error::ErrorResponse; -use crate::http::ui::error::IntoStatusCode; -use axum::response::IntoResponse; +use crate::error::ErrorResponse; +use crate::error::IntoStatusCode; use axum::Json; -use embucket_metastore::error::MetastoreError; +use axum::response::IntoResponse; +use core_executor::error::ExecutionError; +use core_metastore::error::MetastoreError; use http::StatusCode; use snafu::prelude::*; diff --git a/crates/runtime/src/http/ui/schemas/handlers.rs b/crates/api-ui/src/schemas/handlers.rs similarity index 95% rename from crates/runtime/src/http/ui/schemas/handlers.rs rename to crates/api-ui/src/schemas/handlers.rs index b8eaee6c4..7de962137 100644 --- a/crates/runtime/src/http/ui/schemas/handlers.rs +++ b/crates/api-ui/src/schemas/handlers.rs @@ -1,22 +1,22 @@ -use crate::execution::query::QueryContext; -use crate::http::session::DFSessionId; -use crate::http::state::AppState; -use crate::http::ui::schemas::models::SchemasParameters; -use crate::http::{ +use crate::schemas::models::SchemasParameters; +use crate::state::AppState; +use crate::{ error::ErrorResponse, - ui::schemas::error::{SchemasAPIError, SchemasResult}, - ui::schemas::models::{ + schemas::error::{SchemasAPIError, SchemasResult}, + schemas::models::{ Schema, SchemaCreatePayload, SchemaCreateResponse, SchemaResponse, SchemaUpdatePayload, SchemaUpdateResponse, SchemasResponse, }, }; +use api_sessions::DFSessionId; use axum::{ - extract::{Path, Query, State}, Json, + extract::{Path, Query, State}, }; -use embucket_metastore::error::MetastoreError; -use embucket_metastore::models::SchemaIdent as MetastoreSchemaIdent; -use embucket_utils::scan_iterator::ScanIterator; +use core_executor::query::QueryContext; +use core_metastore::error::MetastoreError; +use core_metastore::models::SchemaIdent as MetastoreSchemaIdent; +use core_utils::scan_iterator::ScanIterator; use std::convert::From; use std::convert::Into; use utoipa::OpenApi; diff --git a/crates/runtime/src/http/ui/schemas/mod.rs b/crates/api-ui/src/schemas/mod.rs similarity index 100% rename from crates/runtime/src/http/ui/schemas/mod.rs rename to crates/api-ui/src/schemas/mod.rs diff --git a/crates/runtime/src/http/ui/schemas/models.rs b/crates/api-ui/src/schemas/models.rs similarity index 94% rename from crates/runtime/src/http/ui/schemas/models.rs rename to crates/api-ui/src/schemas/models.rs index 096e63c0d..44bfc94bf 100644 --- a/crates/runtime/src/http/ui/schemas/models.rs +++ b/crates/api-ui/src/schemas/models.rs @@ -1,7 +1,7 @@ -use crate::http::ui::default_limit; +use crate::default_limit; use chrono::NaiveDateTime; -use embucket_metastore::models::{Schema as MetastoreSchema, SchemaIdent as MetastoreSchemaIdent}; -use embucket_metastore::RwObject; +use core_metastore::RwObject; +use core_metastore::models::{Schema as MetastoreSchema, SchemaIdent as MetastoreSchemaIdent}; use serde::{Deserialize, Serialize}; use std::convert::From; use utoipa::{IntoParams, ToSchema}; diff --git a/crates/runtime/src/http/state.rs b/crates/api-ui/src/state.rs similarity index 85% rename from crates/runtime/src/http/state.rs rename to crates/api-ui/src/state.rs index 7e1600650..eda708032 100644 --- a/crates/runtime/src/http/state.rs +++ b/crates/api-ui/src/state.rs @@ -1,12 +1,12 @@ -use embucket_history::history_store::WorksheetsStore; -use embucket_metastore::metastore::Metastore; +use core_history::WorksheetsStore; +use core_metastore::Metastore; use std::collections::HashMap; use std::sync::Arc; use tokio::sync::Mutex; use crate::config::AuthConfig; -use crate::execution::service::ExecutionService; -use crate::http::config::WebConfig; +use crate::config::WebConfig; +use core_executor::service::ExecutionService; // Define a State struct that contains shared services or repositories #[derive(Clone)] diff --git a/crates/runtime/src/http/ui/tables/error.rs b/crates/api-ui/src/tables/error.rs similarity index 93% rename from crates/runtime/src/http/ui/tables/error.rs rename to crates/api-ui/src/tables/error.rs index c462fc3fd..a036b077c 100644 --- a/crates/runtime/src/http/ui/tables/error.rs +++ b/crates/api-ui/src/tables/error.rs @@ -1,10 +1,10 @@ -use crate::execution::error::ExecutionError; -use crate::http::error::ErrorResponse; -use crate::http::ui::error::IntoStatusCode; +use crate::error::ErrorResponse; +use crate::error::IntoStatusCode; +use axum::Json; use axum::extract::multipart; use axum::response::IntoResponse; -use axum::Json; -use embucket_metastore::error::MetastoreError; +use core_executor::error::ExecutionError; +use core_metastore::error::MetastoreError; use http::StatusCode; use snafu::prelude::*; @@ -23,7 +23,7 @@ pub enum TableError { FileField, #[snafu(transparent)] Execution { - source: crate::execution::error::ExecutionError, + source: core_executor::error::ExecutionError, }, #[snafu(transparent)] Metastore { source: MetastoreError }, diff --git a/crates/runtime/src/http/ui/tables/handlers.rs b/crates/api-ui/src/tables/handlers.rs similarity index 97% rename from crates/runtime/src/http/ui/tables/handlers.rs rename to crates/api-ui/src/tables/handlers.rs index 038e695f0..fbf955ba7 100644 --- a/crates/runtime/src/http/ui/tables/handlers.rs +++ b/crates/api-ui/src/tables/handlers.rs @@ -1,26 +1,26 @@ -use crate::execution::query::QueryContext; -use crate::http::error::ErrorResponse; -use crate::http::session::DFSessionId; -use crate::http::state::AppState; -use crate::http::ui::tables::error::{ +use crate::error::ErrorResponse; +use crate::state::AppState; +use crate::tables::error::{ CreateUploadSnafu, MalformedMultipartFileDataSnafu, MalformedMultipartSnafu, TableError, TablesAPIError, TablesResult, }; -use crate::http::ui::tables::models::{ +use crate::tables::models::{ Table, TableColumn, TableColumnsResponse, TablePreviewDataColumn, TablePreviewDataParameters, TablePreviewDataResponse, TablePreviewDataRow, TableStatistics, TableStatisticsResponse, TableUploadPayload, TableUploadResponse, TablesParameters, TablesResponse, UploadParameters, }; +use api_sessions::DFSessionId; use axum::extract::Query; use axum::{ - extract::{Multipart, Path, State}, Json, + extract::{Multipart, Path, State}, }; +use core_executor::query::QueryContext; +use core_metastore::error::MetastoreError; +use core_metastore::{SchemaIdent as MetastoreSchemaIdent, TableIdent as MetastoreTableIdent}; +use core_utils::scan_iterator::ScanIterator; use datafusion::arrow::csv::reader::Format; use datafusion::arrow::util::display::array_value_to_string; -use embucket_metastore::error::MetastoreError; -use embucket_metastore::{SchemaIdent as MetastoreSchemaIdent, TableIdent as MetastoreTableIdent}; -use embucket_utils::scan_iterator::ScanIterator; use snafu::ResultExt; use std::time::Instant; use utoipa::OpenApi; diff --git a/crates/runtime/src/http/ui/tables/mod.rs b/crates/api-ui/src/tables/mod.rs similarity index 100% rename from crates/runtime/src/http/ui/tables/mod.rs rename to crates/api-ui/src/tables/mod.rs diff --git a/crates/runtime/src/http/ui/tables/models.rs b/crates/api-ui/src/tables/models.rs similarity index 99% rename from crates/runtime/src/http/ui/tables/models.rs rename to crates/api-ui/src/tables/models.rs index a387ffc5d..69445dd40 100644 --- a/crates/runtime/src/http/ui/tables/models.rs +++ b/crates/api-ui/src/tables/models.rs @@ -1,4 +1,4 @@ -use crate::http::ui::default_limit; +use crate::default_limit; use chrono::NaiveDateTime; use datafusion::arrow::csv::reader::Format; use serde::{Deserialize, Serialize}; diff --git a/crates/runtime/src/tests/auth.rs b/crates/api-ui/src/tests/auth.rs similarity index 76% rename from crates/runtime/src/tests/auth.rs rename to crates/api-ui/src/tests/auth.rs index d120aa443..c7b628219 100644 --- a/crates/runtime/src/tests/auth.rs +++ b/crates/api-ui/src/tests/auth.rs @@ -1,14 +1,13 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] -use crate::http::auth::error::AuthError; -use crate::http::auth::error::*; -use crate::http::auth::handlers::{create_jwt, get_claims_validate_jwt_token, jwt_claims}; -use crate::http::auth::models::{AccountResponse, AuthResponse, LoginPayload}; -use crate::http::metastore::handlers::RwObjectVec; -use crate::http::ui::queries::models::{QueryCreatePayload, QueryCreateResponse}; -use crate::http::ui::tests::common::{http_req_with_headers, TestHttpError}; -use crate::tests::run_test_server_with_demo_auth; -use embucket_metastore::models::Volume; -use http::{header, HeaderMap, HeaderValue, Method, StatusCode}; +use crate::auth::error::AuthError; +use crate::auth::error::*; +use crate::auth::handlers::{create_jwt, get_claims_validate_jwt_token, jwt_claims}; +use crate::auth::models::{AccountResponse, AuthResponse, LoginPayload}; +use crate::queries::models::{QueryCreatePayload, QueryCreateResponse}; +use crate::tests::common::{TestHttpError, http_req_with_headers}; +use crate::tests::server::run_test_server_with_demo_auth; +use core_metastore::RwObject; +use http::{HeaderMap, HeaderValue, Method, StatusCode, header}; use serde_json::json; use std::collections::HashMap; use std::net::SocketAddr; @@ -18,6 +17,8 @@ const JWT_SECRET: &str = "test"; const DEMO_USER: &str = "demo_user"; const DEMO_PASSWORD: &str = "demo_password"; +pub type RwObjectVec = Vec>; + #[allow(clippy::explicit_iter_loop)] fn get_set_cookie_from_response_headers( headers: &HeaderMap, @@ -52,7 +53,7 @@ where header::CONTENT_TYPE, HeaderValue::from_static("application/json"), )]), - &format!("http://{addr}/auth/login"), + &format!("http://{addr}/ui/auth/login"), json!(LoginPayload { username: String::from(username), password: String::from(password), @@ -76,7 +77,7 @@ where header::CONTENT_TYPE, HeaderValue::from_static("application/json"), )]), - &format!("http://{addr}/auth/logout"), + &format!("http://{addr}/ui/auth/logout"), String::new(), ) .await @@ -104,7 +105,7 @@ where .expect("Can't convert to HeaderValue"), ), ]), - &format!("http://{addr}/auth/refresh"), + &format!("http://{addr}/ui/auth/refresh"), String::new(), ) .await @@ -144,34 +145,6 @@ where .await } -async fn metastore( - client: &reqwest::Client, - addr: &SocketAddr, - access_token: &String, -) -> Result<(HeaderMap, T), TestHttpError> -where - T: serde::de::DeserializeOwned, -{ - http_req_with_headers::( - client, - Method::POST, - HeaderMap::from_iter(vec![ - ( - header::CONTENT_TYPE, - HeaderValue::from_static("application/json"), - ), - ( - header::AUTHORIZATION, - HeaderValue::from_str(format!("Bearer {access_token}").as_str()) - .expect("Can't convert to HeaderValue"), - ), - ]), - &format!("http://{addr}/v1/metastore/volumes"), - String::new(), - ) - .await -} - #[tokio::test] #[allow(clippy::too_many_lines)] async fn test_login_no_secret_set() { @@ -215,51 +188,6 @@ async fn test_bad_login() { ); } -#[tokio::test] -#[allow(clippy::too_many_lines)] -async fn test_metastore_request_unauthorized() { - let addr = run_test_server_with_demo_auth( - JWT_SECRET.to_string(), - DEMO_USER.to_string(), - DEMO_PASSWORD.to_string(), - ) - .await; - let client = reqwest::Client::new(); - - let _ = login::<()>(&client, &addr, "", "") - .await - .expect_err("Login should fail"); - - // Unauthorized error while running metastore request - let metastore_err = metastore::<()>(&client, &addr, &"xyz".to_string()) - .await - .expect_err("Metastore request should fail"); - assert_eq!(metastore_err.status, StatusCode::UNAUTHORIZED); -} - -#[tokio::test] -#[allow(clippy::too_many_lines)] -async fn test_metastore_request_passes_authorization() { - let addr = run_test_server_with_demo_auth( - JWT_SECRET.to_string(), - DEMO_USER.to_string(), - DEMO_PASSWORD.to_string(), - ) - .await; - let client = reqwest::Client::new(); - - let (_, login_response) = login::(&client, &addr, DEMO_USER, DEMO_PASSWORD) - .await - .expect("Failed to login"); - - // Metastore request not returning auth error - let metastore_res = - metastore::>(&client, &addr, &login_response.access_token).await; - if let Err(e) = metastore_res { - assert_ne!(e.status, StatusCode::UNAUTHORIZED); - } -} - #[tokio::test] #[allow(clippy::too_many_lines)] async fn test_query_request_unauthorized() { @@ -377,18 +305,24 @@ async fn test_login_refresh() { .get("refresh_token") .expect("No Set-Cookie found with refresh_token"); - assert!(refresh_token_cookie - .to_str() - .expect("Bad cookie") - .contains("HttpOnly")); - assert!(refresh_token_cookie - .to_str() - .expect("Bad cookie") - .contains("Secure")); - assert!(refresh_token_cookie - .to_str() - .expect("Bad cookie") - .contains("SameSite=Strict")); + assert!( + refresh_token_cookie + .to_str() + .expect("Bad cookie") + .contains("HttpOnly") + ); + assert!( + refresh_token_cookie + .to_str() + .expect("Bad cookie") + .contains("Secure") + ); + assert!( + refresh_token_cookie + .to_str() + .expect("Bad cookie") + .contains("SameSite=Strict") + ); // Successfuly run query let (_, query_response) = @@ -410,18 +344,24 @@ async fn test_login_refresh() { .get("refresh_token") .expect("No Set-Cookie found with refresh_token"); - assert!(refresh_token_cookie - .to_str() - .expect("Bad cookie") - .contains("HttpOnly")); - assert!(refresh_token_cookie - .to_str() - .expect("Bad cookie") - .contains("Secure")); - assert!(refresh_token_cookie - .to_str() - .expect("Bad cookie") - .contains("SameSite=Strict")); + assert!( + refresh_token_cookie + .to_str() + .expect("Bad cookie") + .contains("HttpOnly") + ); + assert!( + refresh_token_cookie + .to_str() + .expect("Bad cookie") + .contains("Secure") + ); + assert!( + refresh_token_cookie + .to_str() + .expect("Bad cookie") + .contains("SameSite=Strict") + ); } #[tokio::test] @@ -491,7 +431,7 @@ async fn test_account_ok() { .expect("Can't convert to HeaderValue"), ), ]), - &format!("http://{addr}/account"), + &format!("http://{addr}/ui/auth/account"), String::new().to_string(), ) .await @@ -521,7 +461,7 @@ async fn test_account_unauthorized() { header::CONTENT_TYPE, HeaderValue::from_static("application/json"), )]), - &format!("http://{addr}/account"), + &format!("http://{addr}/ui/auth/account"), String::new().to_string(), ) .await diff --git a/crates/runtime/src/http/ui/tests/common.rs b/crates/api-ui/src/tests/common.rs similarity index 96% rename from crates/runtime/src/http/ui/tests/common.rs rename to crates/api-ui/src/tests/common.rs index 025268494..cc8e85bf6 100644 --- a/crates/runtime/src/http/ui/tests/common.rs +++ b/crates/api-ui/src/tests/common.rs @@ -1,8 +1,8 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] -use crate::http::ui::databases::models::DatabaseCreatePayload; -use crate::http::ui::volumes::models::VolumeCreatePayload; -use http::{header, HeaderMap, HeaderValue, Method, StatusCode}; +use crate::databases::models::DatabaseCreatePayload; +use crate::volumes::models::VolumeCreatePayload; +use http::{HeaderMap, HeaderValue, Method, StatusCode, header}; use serde_json::json; use std::net::SocketAddr; diff --git a/crates/runtime/src/http/ui/tests/dashboard.rs b/crates/api-ui/src/tests/dashboard.rs similarity index 88% rename from crates/runtime/src/http/ui/tests/dashboard.rs rename to crates/api-ui/src/tests/dashboard.rs index c7bf2c8be..c8e4978b9 100644 --- a/crates/runtime/src/http/ui/tests/dashboard.rs +++ b/crates/api-ui/src/tests/dashboard.rs @@ -1,16 +1,16 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] -use crate::http::ui::dashboard::models::DashboardResponse; -use crate::http::ui::databases::models::DatabaseCreatePayload; -use crate::http::ui::queries::models::QueryCreatePayload; -use crate::http::ui::schemas::models::SchemaCreatePayload; -use crate::http::ui::tests::common::req; -use crate::http::ui::tests::common::{ui_test_op, Entity, Op}; -use crate::http::ui::volumes::models::{Volume, VolumeCreatePayload, VolumeCreateResponse}; -use crate::http::ui::worksheets::models::{WorksheetCreatePayload, WorksheetResponse}; -use crate::tests::run_test_server; -use embucket_metastore::VolumeType as MetastoreVolumeType; -use embucket_metastore::{Database as MetastoreDatabase, Volume as MetastoreVolume}; +use crate::dashboard::models::DashboardResponse; +use crate::databases::models::DatabaseCreatePayload; +use crate::queries::models::QueryCreatePayload; +use crate::schemas::models::SchemaCreatePayload; +use crate::tests::common::req; +use crate::tests::common::{Entity, Op, ui_test_op}; +use crate::tests::server::run_test_server; +use crate::volumes::models::{Volume, VolumeCreatePayload, VolumeCreateResponse}; +use crate::worksheets::models::{WorksheetCreatePayload, WorksheetResponse}; +use core_metastore::VolumeType as MetastoreVolumeType; +use core_metastore::{Database as MetastoreDatabase, Volume as MetastoreVolume}; use http::Method; use serde_json::json; diff --git a/crates/runtime/src/http/ui/tests/databases.rs b/crates/api-ui/src/tests/databases.rs similarity index 95% rename from crates/runtime/src/http/ui/tests/databases.rs rename to crates/api-ui/src/tests/databases.rs index 541e2797b..4fabd1622 100644 --- a/crates/runtime/src/http/ui/tests/databases.rs +++ b/crates/api-ui/src/tests/databases.rs @@ -1,14 +1,12 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] -use crate::http::error::ErrorResponse; -use crate::http::ui::databases::models::{ - DatabaseCreatePayload, DatabaseResponse, DatabasesResponse, -}; -use crate::http::ui::tests::common::{req, ui_test_op, Entity, Op}; -use crate::http::ui::volumes::models::{Volume, VolumeCreatePayload, VolumeCreateResponse}; -use crate::tests::run_test_server; -use embucket_metastore::VolumeType as MetastoreVolumeType; -use embucket_metastore::{Database as MetastoreDatabase, Volume as MetastoreVolume}; +use crate::databases::models::{DatabaseCreatePayload, DatabaseResponse, DatabasesResponse}; +use crate::error::ErrorResponse; +use crate::tests::common::{Entity, Op, req, ui_test_op}; +use crate::tests::server::run_test_server; +use crate::volumes::models::{Volume, VolumeCreatePayload, VolumeCreateResponse}; +use core_metastore::VolumeType as MetastoreVolumeType; +use core_metastore::{Database as MetastoreDatabase, Volume as MetastoreVolume}; use http::Method; #[tokio::test] diff --git a/crates/runtime/src/http/ui/tests/mod.rs b/crates/api-ui/src/tests/mod.rs similarity index 84% rename from crates/runtime/src/http/ui/tests/mod.rs rename to crates/api-ui/src/tests/mod.rs index 00d3fda4b..8678ea0cf 100644 --- a/crates/runtime/src/http/ui/tests/mod.rs +++ b/crates/api-ui/src/tests/mod.rs @@ -1,9 +1,11 @@ +pub mod auth; pub mod common; pub mod dashboard; pub mod databases; pub mod navigation_trees; pub mod queries; pub mod schemas; +pub mod server; pub mod tables; pub mod volumes; pub mod worksheets; diff --git a/crates/runtime/src/http/ui/tests/navigation_trees.rs b/crates/api-ui/src/tests/navigation_trees.rs similarity index 89% rename from crates/runtime/src/http/ui/tests/navigation_trees.rs rename to crates/api-ui/src/tests/navigation_trees.rs index 3561e7a70..b9b2c5429 100644 --- a/crates/runtime/src/http/ui/tests/navigation_trees.rs +++ b/crates/api-ui/src/tests/navigation_trees.rs @@ -1,16 +1,16 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] -use crate::http::ui::databases::models::DatabaseCreatePayload; -use crate::http::ui::navigation_trees::models::NavigationTreesResponse; -use crate::http::ui::queries::models::QueryCreatePayload; -use crate::http::ui::schemas::models::SchemaCreatePayload; -use crate::http::ui::tests::common::req; -use crate::http::ui::tests::common::{ui_test_op, Entity, Op}; -use crate::http::ui::volumes::models::{Volume, VolumeCreatePayload, VolumeCreateResponse}; -use crate::http::ui::worksheets::models::{WorksheetCreatePayload, WorksheetResponse}; -use crate::tests::run_test_server; -use embucket_metastore::VolumeType as MetastoreVolumeType; -use embucket_metastore::{Database as MetastoreDatabase, Volume as MetastoreVolume}; +use crate::databases::models::DatabaseCreatePayload; +use crate::navigation_trees::models::NavigationTreesResponse; +use crate::queries::models::QueryCreatePayload; +use crate::schemas::models::SchemaCreatePayload; +use crate::tests::common::req; +use crate::tests::common::{Entity, Op, ui_test_op}; +use crate::tests::server::run_test_server; +use crate::volumes::models::{Volume, VolumeCreatePayload, VolumeCreateResponse}; +use crate::worksheets::models::{WorksheetCreatePayload, WorksheetResponse}; +use core_metastore::VolumeType as MetastoreVolumeType; +use core_metastore::{Database as MetastoreDatabase, Volume as MetastoreVolume}; use http::Method; use serde_json::json; diff --git a/crates/runtime/src/http/ui/tests/queries.rs b/crates/api-ui/src/tests/queries.rs similarity index 96% rename from crates/runtime/src/http/ui/tests/queries.rs rename to crates/api-ui/src/tests/queries.rs index 3c1ce42f6..7ab86cb1e 100644 --- a/crates/runtime/src/http/ui/tests/queries.rs +++ b/crates/api-ui/src/tests/queries.rs @@ -1,12 +1,12 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] -use crate::http::error::ErrorResponse; -use crate::http::ui::queries::models::{ +use crate::error::ErrorResponse; +use crate::queries::models::{ Column, QueriesResponse, QueryCreatePayload, QueryRecord, QueryStatus, ResultSet, }; -use crate::http::ui::tests::common::http_req; -use crate::http::ui::worksheets::models::{Worksheet, WorksheetCreatePayload, WorksheetsResponse}; -use crate::tests::run_test_server; +use crate::tests::common::http_req; +use crate::tests::server::run_test_server; +use crate::worksheets::models::{Worksheet, WorksheetCreatePayload, WorksheetsResponse}; use http::Method; use serde_json::json; diff --git a/crates/runtime/src/http/ui/tests/schemas.rs b/crates/api-ui/src/tests/schemas.rs similarity index 95% rename from crates/runtime/src/http/ui/tests/schemas.rs rename to crates/api-ui/src/tests/schemas.rs index 02ea55752..1ed4e0e2a 100644 --- a/crates/runtime/src/http/ui/tests/schemas.rs +++ b/crates/api-ui/src/tests/schemas.rs @@ -1,11 +1,11 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] -use crate::http::ui::databases::models::{Database, DatabaseCreatePayload}; -use crate::http::ui::schemas::models::{SchemaCreatePayload, SchemasResponse}; -use crate::http::ui::tests::common::{req, ui_test_op, Entity, Op}; -use crate::http::ui::volumes::models::{Volume, VolumeCreatePayload, VolumeCreateResponse}; -use crate::tests::run_test_server; -use embucket_metastore::{ +use crate::databases::models::{Database, DatabaseCreatePayload}; +use crate::schemas::models::{SchemaCreatePayload, SchemasResponse}; +use crate::tests::common::{Entity, Op, req, ui_test_op}; +use crate::tests::server::run_test_server; +use crate::volumes::models::{Volume, VolumeCreatePayload, VolumeCreateResponse}; +use core_metastore::{ Database as MetastoreDatabase, Volume as MetastoreVolume, VolumeType as MetastoreVolumeType, }; use http::Method; diff --git a/crates/api-ui/src/tests/server.rs b/crates/api-ui/src/tests/server.rs new file mode 100644 index 000000000..fcde5f960 --- /dev/null +++ b/crates/api-ui/src/tests/server.rs @@ -0,0 +1,106 @@ +use crate::auth::layer::require_auth; +use crate::auth::router as auth_router; +use crate::layers::make_cors_middleware; +use crate::router; +use crate::state; +use crate::{config::AuthConfig, config::WebConfig}; +use api_sessions::{RequestSessionMemory, RequestSessionStore}; +use axum::Router; +use axum::middleware; +use core_executor::service::CoreExecutionService; +use core_executor::utils::Config; +use core_history::RecordingExecutionService; +use core_history::store::SlateDBWorksheetsStore; +use core_metastore::SlateDBMetastore; +use core_utils::Db; +use std::net::SocketAddr; +use std::sync::Arc; +use time::Duration; +use tower_sessions::{Expiry, SessionManagerLayer}; + +#[allow(clippy::unwrap_used)] +pub async fn run_test_server_with_demo_auth( + jwt_secret: String, + demo_user: String, + demo_password: String, +) -> SocketAddr { + let listener = tokio::net::TcpListener::bind("0.0.0.0:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + let db = Db::memory().await; + let metastore = Arc::new(SlateDBMetastore::new(db.clone())); + let history = Arc::new(SlateDBWorksheetsStore::new(db)); + let mut auth_config = AuthConfig::new(jwt_secret); + auth_config.with_demo_credentials(demo_user, demo_password); + + let app = make_app( + metastore, + history, + &WebConfig { + port: 3000, + host: "0.0.0.0".to_string(), + allow_origin: None, + }, + auth_config, + ) + .unwrap(); + + tokio::spawn(async move { + axum::serve(listener, app).await.unwrap(); + }); + + addr +} + +#[allow(clippy::unwrap_used)] +pub async fn run_test_server() -> SocketAddr { + run_test_server_with_demo_auth(String::new(), String::new(), String::new()).await +} + +#[allow(clippy::needless_pass_by_value)] +pub fn make_app( + metastore: Arc, + history_store: Arc, + config: &WebConfig, + auth_config: AuthConfig, +) -> Result> { + let execution_cfg = Config::new("json")?; + let execution_svc = Arc::new(CoreExecutionService::new(metastore.clone(), execution_cfg)); + let execution_svc = Arc::new(RecordingExecutionService::new( + execution_svc, + history_store.clone(), + )); + let session_memory = RequestSessionMemory::default(); + let session_store = RequestSessionStore::new(session_memory, execution_svc.clone()); + let session_layer = SessionManagerLayer::new(session_store) + .with_secure(false) + .with_expiry(Expiry::OnInactivity(Duration::seconds(5 * 60))); + + // Create the application state + let app_state = state::AppState::new( + metastore, + history_store, + execution_svc, + Arc::new(config.clone()), + Arc::new(auth_config), + ); + + let ui_router = router::create_router().with_state(app_state.clone()); + let ui_router = ui_router.layer(middleware::from_fn_with_state( + app_state.clone(), + require_auth, + )); + let mut router = Router::new() + .nest("/ui", ui_router) + .nest( + "/ui/auth", + auth_router::create_router().with_state(app_state), + ) + .layer(session_layer); + + if let Some(allow_origin) = config.allow_origin.as_ref() { + router = router.layer(make_cors_middleware(allow_origin)); + } + + Ok(router) +} diff --git a/crates/runtime/src/http/ui/tests/tables.rs b/crates/api-ui/src/tests/tables.rs similarity index 95% rename from crates/runtime/src/http/ui/tests/tables.rs rename to crates/api-ui/src/tests/tables.rs index 92f398a15..7a792de81 100644 --- a/crates/runtime/src/http/ui/tests/tables.rs +++ b/crates/api-ui/src/tests/tables.rs @@ -1,17 +1,17 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] -use crate::http::ui::databases::models::DatabaseCreatePayload; -use crate::http::ui::queries::models::QueryCreatePayload; -use crate::http::ui::schemas::models::SchemaCreatePayload; -use crate::http::ui::tables::models::{ +use crate::databases::models::DatabaseCreatePayload; +use crate::queries::models::QueryCreatePayload; +use crate::schemas::models::SchemaCreatePayload; +use crate::tables::models::{ TableColumnsResponse, TablePreviewDataResponse, TableStatisticsResponse, TablesResponse, }; -use crate::http::ui::tests::common::{req, ui_test_op, Entity, Op}; -use crate::http::ui::volumes::models::{VolumeCreatePayload, VolumeCreateResponse}; -use crate::http::ui::worksheets::{WorksheetCreatePayload, WorksheetResponse}; -use crate::tests::run_test_server; -use embucket_metastore::VolumeType as MetastoreVolumeType; -use embucket_metastore::{Database as MetastoreDatabase, Volume as MetastoreVolume}; +use crate::tests::common::{Entity, Op, req, ui_test_op}; +use crate::tests::server::run_test_server; +use crate::volumes::models::{VolumeCreatePayload, VolumeCreateResponse}; +use crate::worksheets::{WorksheetCreatePayload, WorksheetResponse}; +use core_metastore::VolumeType as MetastoreVolumeType; +use core_metastore::{Database as MetastoreDatabase, Volume as MetastoreVolume}; use http::Method; use serde_json::json; diff --git a/crates/runtime/src/http/ui/tests/volumes.rs b/crates/api-ui/src/tests/volumes.rs similarity index 92% rename from crates/runtime/src/http/ui/tests/volumes.rs rename to crates/api-ui/src/tests/volumes.rs index c2feee909..1b2bd63f4 100644 --- a/crates/runtime/src/http/ui/tests/volumes.rs +++ b/crates/api-ui/src/tests/volumes.rs @@ -1,9 +1,9 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] -use crate::http::ui::tests::common::{ui_test_op, Entity, Op}; -use crate::http::ui::volumes::models::{Volume, VolumeCreatePayload, VolumeCreateResponse}; -use crate::tests::run_test_server; -use embucket_metastore::Volume as MetastoreVolume; -use embucket_metastore::{ +use crate::tests::common::{Entity, Op, ui_test_op}; +use crate::tests::server::run_test_server; +use crate::volumes::models::{Volume, VolumeCreatePayload, VolumeCreateResponse}; +use core_metastore::Volume as MetastoreVolume; +use core_metastore::{ AwsAccessKeyCredentials, AwsCredentials, FileVolume as MetastoreFileVolume, S3Volume as MetastoreS3Volume, VolumeType as MetastoreVolumeType, }; diff --git a/crates/runtime/src/http/ui/tests/worksheets.rs b/crates/api-ui/src/tests/worksheets.rs similarity index 98% rename from crates/runtime/src/http/ui/tests/worksheets.rs rename to crates/api-ui/src/tests/worksheets.rs index 5dd19ae8f..cd5e72afb 100644 --- a/crates/runtime/src/http/ui/tests/worksheets.rs +++ b/crates/api-ui/src/tests/worksheets.rs @@ -1,12 +1,12 @@ #![allow(clippy::unwrap_used, clippy::expect_used)] -use crate::http::error::ErrorResponse; -use crate::http::ui::tests::common::http_req; -use crate::http::ui::worksheets::{ +use crate::error::ErrorResponse; +use crate::tests::common::http_req; +use crate::tests::server::run_test_server; +use crate::worksheets::{ SortBy, SortOrder, Worksheet, WorksheetCreatePayload, WorksheetUpdatePayload, WorksheetsResponse, }; -use crate::tests::run_test_server; use http::Method; use reqwest; use serde_json::json; diff --git a/crates/runtime/src/http/ui/volumes/error.rs b/crates/api-ui/src/volumes/error.rs similarity index 94% rename from crates/runtime/src/http/ui/volumes/error.rs rename to crates/api-ui/src/volumes/error.rs index d097fc593..e511b43a9 100644 --- a/crates/runtime/src/http/ui/volumes/error.rs +++ b/crates/api-ui/src/volumes/error.rs @@ -1,8 +1,8 @@ -use crate::http::error::ErrorResponse; -use crate::http::ui::error::IntoStatusCode; -use axum::response::IntoResponse; +use crate::error::ErrorResponse; +use crate::error::IntoStatusCode; use axum::Json; -use embucket_metastore::error::MetastoreError; +use axum::response::IntoResponse; +use core_metastore::error::MetastoreError; use http::StatusCode; use snafu::prelude::*; pub type VolumesResult = Result; diff --git a/crates/runtime/src/http/ui/volumes/handlers.rs b/crates/api-ui/src/volumes/handlers.rs similarity index 94% rename from crates/runtime/src/http/ui/volumes/handlers.rs rename to crates/api-ui/src/volumes/handlers.rs index 3ec552fe3..057d788eb 100644 --- a/crates/runtime/src/http/ui/volumes/handlers.rs +++ b/crates/api-ui/src/volumes/handlers.rs @@ -1,21 +1,20 @@ -use crate::http::state::AppState; -use crate::http::ui::volumes::models::VolumesParameters; -use crate::http::{ +use crate::state::AppState; +use crate::volumes::models::VolumesParameters; +use crate::{ error::ErrorResponse, - metastore::handlers::QueryParameters, - ui::volumes::error::{VolumesAPIError, VolumesResult}, - ui::volumes::models::{ + volumes::error::{VolumesAPIError, VolumesResult}, + volumes::models::{ VolumeCreatePayload, VolumeCreateResponse, VolumeResponse, VolumeUpdatePayload, VolumeUpdateResponse, VolumesResponse, }, }; use axum::{ - extract::{Path, Query, State}, Json, + extract::{Path, Query, State}, }; -use embucket_metastore::error::MetastoreError; -use embucket_metastore::models::Volume as MetastoreVolume; -use embucket_utils::scan_iterator::ScanIterator; +use core_metastore::error::MetastoreError; +use core_metastore::models::Volume as MetastoreVolume; +use core_utils::scan_iterator::ScanIterator; use utoipa::OpenApi; use validator::Validate; @@ -42,6 +41,12 @@ use validator::Validate; )] pub struct ApiDoc; +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +pub struct QueryParameters { + #[serde(default)] + pub cascade: Option, +} + #[utoipa::path( post, operation_id = "createVolume", diff --git a/crates/runtime/src/http/ui/volumes/mod.rs b/crates/api-ui/src/volumes/mod.rs similarity index 100% rename from crates/runtime/src/http/ui/volumes/mod.rs rename to crates/api-ui/src/volumes/mod.rs diff --git a/crates/runtime/src/http/ui/volumes/models.rs b/crates/api-ui/src/volumes/models.rs similarity index 97% rename from crates/runtime/src/http/ui/volumes/models.rs rename to crates/api-ui/src/volumes/models.rs index 55af4b9fe..e3aa1e8af 100644 --- a/crates/runtime/src/http/ui/volumes/models.rs +++ b/crates/api-ui/src/volumes/models.rs @@ -1,9 +1,9 @@ -use crate::http::ui::default_limit; -use embucket_metastore::models::{ +use crate::default_limit; +use core_metastore::S3TablesVolume as MetastoreS3TablesVolume; +use core_metastore::models::{ AwsCredentials, FileVolume as MetastoreFileVolume, S3Volume as MetastoreS3Volume, Volume as MetastoreVolume, VolumeType as MetastoreVolumeType, }; -use embucket_metastore::S3TablesVolume as MetastoreS3TablesVolume; use serde::{Deserialize, Serialize}; use utoipa::{IntoParams, ToSchema}; diff --git a/crates/runtime/src/http/web_assets/config.rs b/crates/api-ui/src/web_assets/config.rs similarity index 80% rename from crates/runtime/src/http/web_assets/config.rs rename to crates/api-ui/src/web_assets/config.rs index 63d7c252b..47537ecb4 100644 --- a/crates/runtime/src/http/web_assets/config.rs +++ b/crates/api-ui/src/web_assets/config.rs @@ -4,5 +4,4 @@ use serde::{Deserialize, Serialize}; pub struct StaticWebConfig { pub host: String, pub port: u16, - pub allow_origin: Option, } diff --git a/crates/runtime/src/http/web_assets/error.rs b/crates/api-ui/src/web_assets/error.rs similarity index 96% rename from crates/runtime/src/http/web_assets/error.rs rename to crates/api-ui/src/web_assets/error.rs index 862c850a3..67ae871c5 100644 --- a/crates/runtime/src/http/web_assets/error.rs +++ b/crates/api-ui/src/web_assets/error.rs @@ -1,6 +1,6 @@ -use crate::http::error::ErrorResponse; -use axum::response::IntoResponse; +use crate::error::ErrorResponse; use axum::Json; +use axum::response::IntoResponse; use http::Error; use http::StatusCode; use snafu::Snafu; diff --git a/crates/runtime/src/http/web_assets/handler.rs b/crates/api-ui/src/web_assets/handler.rs similarity index 98% rename from crates/runtime/src/http/web_assets/handler.rs rename to crates/api-ui/src/web_assets/handler.rs index 3d024aba3..5de176293 100644 --- a/crates/runtime/src/http/web_assets/handler.rs +++ b/crates/api-ui/src/web_assets/handler.rs @@ -5,7 +5,7 @@ use super::error::{ use axum::{ body::Body, extract::Path, - http::{header, StatusCode}, + http::{StatusCode, header}, response::{IntoResponse, Redirect, Response}, }; use mime_guess; diff --git a/crates/runtime/src/http/web_assets/mod.rs b/crates/api-ui/src/web_assets/mod.rs similarity index 76% rename from crates/runtime/src/http/web_assets/mod.rs rename to crates/api-ui/src/web_assets/mod.rs index 5a80ed5eb..d25e066a4 100644 --- a/crates/runtime/src/http/web_assets/mod.rs +++ b/crates/api-ui/src/web_assets/mod.rs @@ -2,5 +2,3 @@ pub mod config; pub mod error; pub mod handler; pub mod server; - -pub use server::*; diff --git a/crates/api-ui/src/web_assets/router.rs b/crates/api-ui/src/web_assets/router.rs new file mode 100644 index 000000000..3802282eb --- /dev/null +++ b/crates/api-ui/src/web_assets/router.rs @@ -0,0 +1,11 @@ +use crate::state::AppState; +use axum::Router; +use axum::routing::get; + +use crate::web_assets::handlers::{root_handler, tar_handler}; + +pub fn create_router() -> Router { + Router::new() + .route("/", get(root_handler)) + .route(format!("/{{*path}}").as_str(), get(tar_handler)) +} diff --git a/crates/api-ui/src/web_assets/server.rs b/crates/api-ui/src/web_assets/server.rs new file mode 100644 index 000000000..42804274b --- /dev/null +++ b/crates/api-ui/src/web_assets/server.rs @@ -0,0 +1,133 @@ +use super::handler::WEB_ASSETS_MOUNT_PATH; +use super::handler::{root_handler, tar_handler}; +use crate::config::StaticWebConfig; +use axum::{Router, routing::get}; +use core::net::SocketAddr; +use tower_http::trace::TraceLayer; + +// TODO: Refactor this: move wiring and serve logic to embucketd +// This layer should not bother with wiring and serve logic +#[allow(clippy::unwrap_used, clippy::as_conversions)] +pub async fn run_web_assets_server( + config: &StaticWebConfig, +) -> Result> { + let StaticWebConfig { host, port } = config; + + let app = Router::new() + .route(WEB_ASSETS_MOUNT_PATH, get(root_handler)) + .route( + format!("{WEB_ASSETS_MOUNT_PATH}{{*path}}").as_str(), + get(tar_handler), + ) + .layer(TraceLayer::new_for_http()); + + // TODO: CORS settings are now handled by embucketd + // if let Some(allow_origin) = allow_origin.as_ref() { + // app = app.layer(make_cors_middleware(allow_origin)); + // } + + let listener = tokio::net::TcpListener::bind(format!("{host}:{port}")).await?; + let addr = listener.local_addr()?; + tracing::info!("Listening on http://{}", addr); + + tokio::spawn(async move { + axum::serve(listener, app) + // .with_graceful_shutdown(shutdown_signal()) + .await + }); + + Ok(addr) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::web_assets::config::StaticWebConfig; + use http::Method; + use reqwest; + use reqwest::header; + + #[allow(clippy::expect_used)] + #[tokio::test] + async fn test_web_assets_server() { + let addr = run_web_assets_server(&StaticWebConfig { + host: "0.0.0.0".to_string(), + port: 0, + }) + .await; + + assert!(addr.is_ok()); + + let client = reqwest::Client::new(); + let addr = addr.expect("Failed to run web assets server"); + let res = client + .request(Method::GET, format!("http://{addr}/index.html")) + .send() + .await + .expect("Failed to send request to web assets server"); + + assert_eq!(http::StatusCode::OK, res.status()); + + let content_length = res + .headers() + .get(header::CONTENT_LENGTH) + .expect("Content-Length header not found") + .to_str() + .expect("Failed to get str from Content-Length header") + .parse::() + .expect("Failed to parse Content-Length header"); + + assert!(content_length > 0); + } + + #[allow(clippy::expect_used)] + #[tokio::test] + async fn test_web_assets_server_redirect() { + let addr = run_web_assets_server(&StaticWebConfig { + host: "0.0.0.0".to_string(), + port: 0, + }) + .await; + + assert!(addr.is_ok()); + + let client = reqwest::Client::builder() + .redirect(reqwest::redirect::Policy::none()) + .build() + .expect("Failed to build client for redirect"); + + let addr = addr.expect("Failed to run web assets server"); + let res = client + .request(Method::GET, format!("http://{addr}/deadbeaf")) + .send() + .await + .expect("Failed to send request to web assets server"); + + assert_eq!(http::StatusCode::SEE_OTHER, res.status()); + + let redirect = res + .headers() + .get(header::LOCATION) + .expect("Location header not found") + .to_str() + .expect("Failed to get str from Location header"); + assert_eq!(redirect, "/index.html"); + + // redirect from root to index.html + let res = client + .request(Method::GET, format!("http://{addr}/")) + .send() + .await + .expect("Failed to send request to web assets server"); + + assert_eq!(http::StatusCode::SEE_OTHER, res.status()); + + let redirect = res + .headers() + .get(header::LOCATION) + .expect("Location header not found") + .to_str() + .expect("Failed to get str from Location header"); + assert_eq!(redirect, "/index.html"); + } +} diff --git a/crates/runtime/src/http/ui/worksheets/error.rs b/crates/api-ui/src/worksheets/error.rs similarity index 95% rename from crates/runtime/src/http/ui/worksheets/error.rs rename to crates/api-ui/src/worksheets/error.rs index 18d9602ea..23d79d173 100644 --- a/crates/runtime/src/http/ui/worksheets/error.rs +++ b/crates/api-ui/src/worksheets/error.rs @@ -1,8 +1,8 @@ -use crate::http::error::ErrorResponse; -use crate::http::ui::error::IntoStatusCode; -use axum::response::IntoResponse; +use crate::error::ErrorResponse; +use crate::error::IntoStatusCode; use axum::Json; -use embucket_history::history_store::WorksheetsStoreError; +use axum::response::IntoResponse; +use core_history::history_store::WorksheetsStoreError; use http::status::StatusCode; use snafu::prelude::*; diff --git a/crates/runtime/src/http/ui/worksheets/handlers.rs b/crates/api-ui/src/worksheets/handlers.rs similarity index 97% rename from crates/runtime/src/http/ui/worksheets/handlers.rs rename to crates/api-ui/src/worksheets/handlers.rs index 52851b2a9..a92cb9b6f 100644 --- a/crates/runtime/src/http/ui/worksheets/handlers.rs +++ b/crates/api-ui/src/worksheets/handlers.rs @@ -1,16 +1,16 @@ -use crate::http::error::ErrorResponse; -use crate::http::state::AppState; -use crate::http::ui::worksheets::{ - error::{WorksheetUpdateError, WorksheetsAPIError, WorksheetsResult}, +use crate::error::ErrorResponse; +use crate::state::AppState; +use crate::worksheets::{ GetWorksheetsParams, SortBy, SortOrder, Worksheet, WorksheetCreatePayload, WorksheetCreateResponse, WorksheetResponse, WorksheetUpdatePayload, WorksheetsResponse, + error::{WorksheetUpdateError, WorksheetsAPIError, WorksheetsResult}, }; use axum::{ - extract::{Path, Query, State}, Json, + extract::{Path, Query, State}, }; use chrono::Utc; -use embucket_history::WorksheetId; +use core_history::WorksheetId; use std::convert::From; use tracing; use utoipa::OpenApi; @@ -149,7 +149,7 @@ pub async fn create_worksheet( payload.name }; - let history_worksheet = embucket_history::Worksheet::new(name, payload.content); + let history_worksheet = core_history::Worksheet::new(name, payload.content); let worksheet = state .history_store diff --git a/crates/runtime/src/http/ui/worksheets/mod.rs b/crates/api-ui/src/worksheets/mod.rs similarity index 100% rename from crates/runtime/src/http/ui/worksheets/mod.rs rename to crates/api-ui/src/worksheets/mod.rs diff --git a/crates/runtime/src/http/ui/worksheets/models.rs b/crates/api-ui/src/worksheets/models.rs similarity index 91% rename from crates/runtime/src/http/ui/worksheets/models.rs rename to crates/api-ui/src/worksheets/models.rs index 51c067013..115f42a8b 100644 --- a/crates/runtime/src/http/ui/worksheets/models.rs +++ b/crates/api-ui/src/worksheets/models.rs @@ -1,5 +1,5 @@ use chrono::{DateTime, Utc}; -use embucket_history::WorksheetId; +use core_history::WorksheetId; use serde::{Deserialize, Serialize}; use std::fmt; use std::fmt::Display; @@ -31,8 +31,8 @@ pub struct Worksheet { pub updated_at: DateTime, } -impl From for Worksheet { - fn from(worksheet: embucket_history::Worksheet) -> Self { +impl From for Worksheet { + fn from(worksheet: core_history::Worksheet) -> Self { Self { id: worksheet.id, name: worksheet.name, @@ -44,9 +44,9 @@ impl From for Worksheet { } #[allow(clippy::from_over_into)] -impl Into for Worksheet { - fn into(self) -> embucket_history::Worksheet { - embucket_history::Worksheet { +impl Into for Worksheet { + fn into(self) -> core_history::Worksheet { + core_history::Worksheet { id: self.id, name: self.name, content: self.content, diff --git a/crates/core-executor/Cargo.toml b/crates/core-executor/Cargo.toml new file mode 100644 index 000000000..b723aec5d --- /dev/null +++ b/crates/core-executor/Cargo.toml @@ -0,0 +1,53 @@ +[package] +name = "core-executor" +version = "0.1.0" +edition = "2024" +license-file.workspace = true + +[dependencies] +core-utils = { path = "../core-utils" } +core-metastore = { path = "../core-metastore" } +df-builtins = { path = "../df-builtins" } +df-catalog = { path = "../df-catalog" } + +async-trait = { workspace = true } +aws-config = { workspace = true } +aws-credential-types = { workspace = true } +bytes = { workspace = true } +chrono = { workspace = true } +datafusion = { workspace = true } +datafusion-common = { workspace = true } +datafusion-doc = { workspace = true } +datafusion-expr = { workspace = true } +datafusion-functions-json = { workspace = true } +datafusion-physical-plan = { workspace = true } +datafusion_iceberg = { workspace = true } +futures = { workspace = true } +sqlparser = { git = "https://github.com/Embucket/datafusion-sqlparser-rs.git", rev = "ed416548dcfe4a73a3240bbf625fb9010a4925c8", features = [ + "visitor", +] } +iceberg-rust = { workspace = true } +iceberg-s3tables-catalog = { workspace = true } + +object_store = { workspace = true } +regex = { workspace = true } +snafu = { workspace = true } +serde = { workspace = true } +serde_json = { workspace = true } + +strum = { workspace = true } +tracing = { workspace = true } +tracing-attributes = { workspace = true } +tokio = { workspace = true } +tokio-stream = "0.1.17" + +url = { workspace = true } +uuid = { workspace = true } + +[dev-dependencies] +bytes = { workspace = true } +insta = { version = "1.42.0", features = ["yaml", "filters"] } +paste = "1" + +[lints] +workspace = true diff --git a/crates/runtime/src/execution/datafusion/analyzer.rs b/crates/core-executor/src/datafusion/analyzer.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/analyzer.rs rename to crates/core-executor/src/datafusion/analyzer.rs diff --git a/crates/runtime/src/execution/datafusion/error.rs b/crates/core-executor/src/datafusion/error.rs similarity index 95% rename from crates/runtime/src/execution/datafusion/error.rs rename to crates/core-executor/src/datafusion/error.rs index dc43f2e1c..978cd56af 100644 --- a/crates/runtime/src/execution/datafusion/error.rs +++ b/crates/core-executor/src/datafusion/error.rs @@ -5,7 +5,9 @@ use snafu::prelude::*; #[snafu(visibility(pub(crate)))] pub enum SQLError { #[snafu(display("Arrow error: {source}"))] - Arrow { source: arrow::error::ArrowError }, + Arrow { + source: datafusion::arrow::error::ArrowError, + }, #[snafu(display("DataFusion error: {source}"))] DataFusion { source: DataFusionError }, diff --git a/crates/runtime/src/execution/datafusion/mod.rs b/crates/core-executor/src/datafusion/mod.rs similarity index 78% rename from crates/runtime/src/execution/datafusion/mod.rs rename to crates/core-executor/src/datafusion/mod.rs index 453af9f53..de8cb8546 100644 --- a/crates/runtime/src/execution/datafusion/mod.rs +++ b/crates/core-executor/src/datafusion/mod.rs @@ -2,7 +2,8 @@ //pub mod error; pub mod analyzer; pub mod error; -pub mod functions; pub mod planner; pub mod type_planner; pub mod visitors; + +pub use df_builtins as functions; diff --git a/crates/runtime/src/execution/datafusion/planner.rs b/crates/core-executor/src/datafusion/planner.rs similarity index 98% rename from crates/runtime/src/execution/datafusion/planner.rs rename to crates/core-executor/src/datafusion/planner.rs index 587099e39..e4a611d52 100644 --- a/crates/runtime/src/execution/datafusion/planner.rs +++ b/crates/core-executor/src/datafusion/planner.rs @@ -1,12 +1,12 @@ use datafusion::arrow::datatypes::{Field, Schema}; use datafusion::common::Result; -use datafusion::common::{plan_err, ToDFSchema}; +use datafusion::common::{ToDFSchema, plan_err}; use datafusion::logical_expr::sqlparser::ast::{Ident, ObjectName}; use datafusion::logical_expr::{CreateMemoryTable, DdlStatement, EmptyRelation, LogicalPlan}; use datafusion::sql::parser::{DFParser, Statement as DFStatement}; use datafusion::sql::planner::{ - object_name_to_table_reference, ContextProvider, IdentNormalizer, ParserOptions, - PlannerContext, SqlToRel, + ContextProvider, IdentNormalizer, ParserOptions, PlannerContext, SqlToRel, + object_name_to_table_reference, }; use datafusion::sql::sqlparser::ast::{ ColumnDef as SQLColumnDef, ColumnOption, CreateTable as CreateTableStatement, diff --git a/crates/runtime/src/execution/datafusion/type_planner.rs b/crates/core-executor/src/datafusion/type_planner.rs similarity index 98% rename from crates/runtime/src/execution/datafusion/type_planner.rs rename to crates/core-executor/src/datafusion/type_planner.rs index 55bcb5387..175824e7c 100644 --- a/crates/runtime/src/execution/datafusion/type_planner.rs +++ b/crates/core-executor/src/datafusion/type_planner.rs @@ -4,7 +4,7 @@ use datafusion::logical_expr::planner::TypePlanner; use datafusion::logical_expr::sqlparser::ast; use datafusion::sql::sqlparser::ast::DataType as SQLDataType; use datafusion::sql::utils::make_decimal_type; -use datafusion_common::{not_impl_err, DataFusionError}; +use datafusion_common::{DataFusionError, not_impl_err}; #[derive(Debug)] pub struct CustomTypePlanner {} diff --git a/crates/runtime/src/execution/datafusion/visitors/functions_rewriter.rs b/crates/core-executor/src/datafusion/visitors/functions_rewriter.rs similarity index 62% rename from crates/runtime/src/execution/datafusion/visitors/functions_rewriter.rs rename to crates/core-executor/src/datafusion/visitors/functions_rewriter.rs index 5e43ee357..2e3f62f02 100644 --- a/crates/runtime/src/execution/datafusion/visitors/functions_rewriter.rs +++ b/crates/core-executor/src/datafusion/visitors/functions_rewriter.rs @@ -1,37 +1,3 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - use datafusion_expr::sqlparser::ast::VisitMut; use datafusion_expr::sqlparser::ast::{ Expr, FunctionArg, FunctionArgExpr, FunctionArgumentList, FunctionArguments, Ident, ObjectName, @@ -46,7 +12,7 @@ impl VisitorMut for FunctionsRewriter { type Break = (); fn post_visit_expr(&mut self, expr: &mut Expr) -> std::ops::ControlFlow { - if let Expr::Function(ref mut func) = expr { + if let Expr::Function(func) = expr { let func_name_string = func.name.clone().to_string().to_lowercase(); let func_name = func_name_string.as_str(); let args = &mut func.args; diff --git a/crates/runtime/src/execution/datafusion/visitors/json_element.rs b/crates/core-executor/src/datafusion/visitors/json_element.rs similarity index 67% rename from crates/runtime/src/execution/datafusion/visitors/json_element.rs rename to crates/core-executor/src/datafusion/visitors/json_element.rs index de6600017..29f6a6f56 100644 --- a/crates/runtime/src/execution/datafusion/visitors/json_element.rs +++ b/crates/core-executor/src/datafusion/visitors/json_element.rs @@ -1,37 +1,3 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - use datafusion_expr::sqlparser::ast::VisitMut; use datafusion_expr::sqlparser::ast::{ Expr as ASTExpr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, diff --git a/crates/core-executor/src/datafusion/visitors/mod.rs b/crates/core-executor/src/datafusion/visitors/mod.rs new file mode 100644 index 000000000..5087388c8 --- /dev/null +++ b/crates/core-executor/src/datafusion/visitors/mod.rs @@ -0,0 +1,4 @@ +//pub mod analyzer; +//pub mod error; +pub mod functions_rewriter; +pub mod json_element; diff --git a/crates/runtime/src/execution/dedicated_executor.rs b/crates/core-executor/src/dedicated_executor.rs similarity index 97% rename from crates/runtime/src/execution/dedicated_executor.rs rename to crates/core-executor/src/dedicated_executor.rs index 8149be530..77ab2b411 100644 --- a/crates/runtime/src/execution/dedicated_executor.rs +++ b/crates/core-executor/src/dedicated_executor.rs @@ -11,13 +11,13 @@ use async_trait::async_trait; use datafusion::physical_plan::stream::RecordBatchStreamAdapter; -use datafusion_common::error::GenericError; use datafusion_common::DataFusionError; +use datafusion_common::error::GenericError; use datafusion_physical_plan::SendableRecordBatchStream; use futures::stream::BoxStream; use futures::{ - future::{BoxFuture, Shared}, Future, FutureExt, Stream, StreamExt, TryFutureExt, + future::{BoxFuture, Shared}, }; use object_store::path::Path; use object_store::{ @@ -33,7 +33,7 @@ use tokio::runtime::Builder; use tokio::task::JoinHandle; use tokio::{ runtime::Handle, - sync::{oneshot::error::RecvError, Notify}, + sync::{Notify, oneshot::error::RecvError}, task::JoinSet, }; use tokio_stream::wrappers::ReceiverStream; @@ -277,28 +277,23 @@ impl DedicatedExecutor { // make a copy to send any job error results back let error_tx = tx.clone(); - // This task will run on the CPU runtime - let task = self.spawn(async move { - // drive the stream forward on the CPU runtime, sending results - // back to the original (presumably IO) runtime - let mut stream = Box::pin(stream); - while let Some(result) = stream.next().await { - // try to send to the sender, if error means the - // receiver has been closed and we terminate early - if tx.send(result).await.is_err() { - return; - } - } - }); - - // fire up a task on the current runtime which transfers results back - // from the CPU runtime to the calling runtime + // run the stream on the CPU runtime and forward errors/results back + let executor = self.clone(); let mut set = JoinSet::new(); set.spawn(async move { - if let Err(e) = task.await { - // error running task, try and report it back. An error sending - // means the receiver was dropped so there is nowhere to - // report errors. Thus ignored via ok() + // schedule on CPU runtime and await completion + if let Err(e) = executor + .spawn(async move { + let mut stream = Box::pin(stream); + while let Some(result) = stream.next().await { + if tx.send(result).await.is_err() { + return; + } + } + }) + .await + { + // error running task, report back if possible error_tx.send(Err(converter(e))).await.ok(); } }); @@ -946,9 +941,9 @@ mod tests { #[tokio::test] async fn basic_clone() { let barrier = Arc::new(Barrier::new(2)); - let exec = exec(); + let exec = exec().clone(); // Run task on clone should work fine - let dedicated_task = exec.clone().spawn(do_work(42, Arc::clone(&barrier))); + let dedicated_task = exec.spawn(do_work(42, Arc::clone(&barrier))); barrier.wait(); assert_eq!(dedicated_task.await.unwrap(), 42); @@ -1071,13 +1066,7 @@ mod tests { #[tokio::test] async fn panic_on_executor_other() { let exec = exec(); - let dedicated_task = exec.spawn(async move { - if true { - panic_any(1) - } else { - 42 - } - }); + let dedicated_task = exec.spawn(async move { if true { panic_any(1) } else { 42 } }); // should not be able to get the result let err = dedicated_task.await.unwrap_err(); diff --git a/crates/runtime/src/execution/error.rs b/crates/core-executor/src/error.rs similarity index 87% rename from crates/runtime/src/execution/error.rs rename to crates/core-executor/src/error.rs index 9df4e9dfb..309bdc486 100644 --- a/crates/runtime/src/execution/error.rs +++ b/crates/core-executor/src/error.rs @@ -1,6 +1,7 @@ use std::backtrace::Backtrace; use datafusion_common::DataFusionError; +use df_catalog::error::Error as CatalogError; use iceberg_rust::error::Error as IcebergError; use iceberg_s3tables_catalog::error::Error as S3tablesError; use snafu::prelude::*; @@ -30,7 +31,9 @@ pub enum ExecutionError { InvalidBucketIdentifier { ident: String }, #[snafu(display("Arrow error: {source}"))] - Arrow { source: arrow::error::ArrowError }, + Arrow { + source: datafusion::arrow::error::ArrowError, + }, #[snafu(display("No Table Provider found for table: {table_name}"))] TableProviderNotFound { table_name: String }, @@ -49,7 +52,7 @@ pub enum ExecutionError { #[snafu(display("Metastore error: {source}"))] Metastore { - source: embucket_metastore::error::MetastoreError, + source: core_metastore::error::MetastoreError, }, #[snafu(display("Database {db} not found"))] @@ -73,8 +76,8 @@ pub enum ExecutionError { #[snafu(display("Unsupported file format {format}"))] UnsupportedFileFormat { format: String }, - #[snafu(display("Cannot refresh catalog list"))] - RefreshCatalogList { message: String }, + #[snafu(display("Cannot refresh catalog list: {source}"))] + RefreshCatalogList { source: CatalogError }, #[snafu(display("Catalog {catalog} cannot be downcasted"))] CatalogDownCast { catalog: String }, @@ -93,7 +96,7 @@ pub enum ExecutionError { #[snafu(display("Threaded Job error: {source}: {backtrace}"))] JobError { - source: crate::execution::dedicated_executor::JobError, + source: crate::dedicated_executor::JobError, backtrace: Backtrace, }, @@ -103,8 +106,8 @@ pub enum ExecutionError { #[snafu(display("CatalogList failed"))] CatalogListDowncast, - #[snafu(display("Failed to register catalog {catalog}"))] - RegisterCatalog { catalog: String }, + #[snafu(display("Failed to register catalog: {source}"))] + RegisterCatalog { source: CatalogError }, } pub type ExecutionResult = std::result::Result; diff --git a/crates/runtime/src/execution/mod.rs b/crates/core-executor/src/lib.rs similarity index 79% rename from crates/runtime/src/execution/mod.rs rename to crates/core-executor/src/lib.rs index 3bb54d087..87fc06253 100644 --- a/crates/runtime/src/execution/mod.rs +++ b/crates/core-executor/src/lib.rs @@ -1,4 +1,4 @@ -pub mod catalog; +pub use df_catalog as catalog; pub mod datafusion; pub mod dedicated_executor; pub mod error; @@ -8,6 +8,5 @@ pub mod service; pub mod session; pub mod utils; -pub mod recording_service; #[cfg(test)] pub mod tests; diff --git a/crates/runtime/src/execution/models.rs b/crates/core-executor/src/models.rs similarity index 98% rename from crates/runtime/src/execution/models.rs rename to crates/core-executor/src/models.rs index 78af331ba..1b69fded7 100644 --- a/crates/runtime/src/execution/models.rs +++ b/crates/core-executor/src/models.rs @@ -3,6 +3,8 @@ use datafusion::arrow::datatypes::{DataType, Field, TimeUnit}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; +// TODO: We should not have serde dependency here +// Instead it should be in api-snowflake-rest #[derive(Debug, Serialize, Deserialize, Clone)] pub struct ColumnInfo { pub name: String, diff --git a/crates/runtime/src/execution/query.rs b/crates/core-executor/src/query.rs similarity index 98% rename from crates/runtime/src/execution/query.rs rename to crates/core-executor/src/query.rs index 2501e470e..c3bcb4854 100644 --- a/crates/runtime/src/execution/query.rs +++ b/crates/core-executor/src/query.rs @@ -1,5 +1,10 @@ use super::catalog::information_schema::information_schema::{ - InformationSchemaProvider, INFORMATION_SCHEMA, + INFORMATION_SCHEMA, InformationSchemaProvider, +}; +use core_metastore::{ + Metastore, SchemaIdent as MetastoreSchemaIdent, + TableCreateRequest as MetastoreTableCreateRequest, TableFormat as MetastoreTableFormat, + TableIdent as MetastoreTableIdent, }; use datafusion::arrow::array::{Int64Array, RecordBatch}; use datafusion::arrow::datatypes::{DataType, Field, Schema as ArrowSchema}; @@ -8,7 +13,7 @@ use datafusion::catalog::{CatalogProvider, SchemaProvider}; use datafusion::datasource::default_table_source::provider_as_source; use datafusion::execution::session_state::SessionContextProvider; use datafusion::execution::session_state::SessionState; -use datafusion::logical_expr::{sqlparser::ast::Insert, LogicalPlan, TableSource}; +use datafusion::logical_expr::{LogicalPlan, TableSource, sqlparser::ast::Insert}; use datafusion::prelude::CsvReadOptions; use datafusion::sql::parser::{CreateExternalTable, DFParser, Statement as DFStatement}; use datafusion::sql::sqlparser::ast::{ @@ -16,18 +21,13 @@ use datafusion::sql::sqlparser::ast::{ TableFactor, TableObject, TableWithJoins, }; use datafusion_common::{ - plan_datafusion_err, DataFusionError, ResolvedTableReference, TableReference, + DataFusionError, ResolvedTableReference, TableReference, plan_datafusion_err, }; use datafusion_expr::logical_plan::dml::{DmlStatement, InsertOp, WriteOp}; use datafusion_expr::{CreateMemoryTable, DdlStatement}; use datafusion_iceberg::catalog::catalog::IcebergCatalog; -use embucket_metastore::{ - Metastore, SchemaIdent as MetastoreSchemaIdent, - TableCreateRequest as MetastoreTableCreateRequest, TableFormat as MetastoreTableFormat, - TableIdent as MetastoreTableIdent, -}; -use iceberg_rust::catalog::create::CreateTableBuilder; use iceberg_rust::catalog::Catalog; +use iceberg_rust::catalog::create::CreateTableBuilder; use iceberg_rust::spec::arrow::schema::new_fields_with_ids; use iceberg_rust::spec::namespace::Namespace; use iceberg_rust::spec::schema::Schema; @@ -40,8 +40,8 @@ use sqlparser::ast::{ BinaryOperator, GroupByExpr, MergeAction, MergeClauseKind, MergeInsertKind, ObjectNamePart, ObjectType, Query as AstQuery, Select, SelectItem, UpdateTableFromKind, Use, }; -use std::collections::hash_map::Entry; use std::collections::HashMap; +use std::collections::hash_map::Entry; use std::sync::Arc; use url::Url; @@ -49,19 +49,19 @@ use super::catalog::{ catalog_list::EmbucketCatalogList, catalogs::embucket::catalog::EmbucketCatalog, }; use super::datafusion::planner::ExtendedSqlToRel; -use super::error::{self as ex_error, ExecutionError, ExecutionResult}; +use super::error::{self as ex_error, ExecutionError, ExecutionResult, RefreshCatalogListSnafu}; use super::session::UserSession; -use super::utils::{is_logical_plan_effectively_empty, NormalizedIdent}; -use crate::execution::catalog::catalog::CachingCatalog; -use crate::execution::datafusion::visitors::{functions_rewriter, json_element}; -use embucket_history::WorksheetId; +use super::utils::{NormalizedIdent, is_logical_plan_effectively_empty}; +use crate::datafusion::visitors::{functions_rewriter, json_element}; +use df_catalog::catalog::CachingCatalog; use tracing_attributes::instrument; #[derive(Default, Debug, Serialize, Deserialize, Clone, PartialEq, Eq)] pub struct QueryContext { pub database: Option, pub schema: Option, - pub worksheet_id: Option, + // TODO: Remove this + pub worksheet_id: Option, } impl QueryContext { @@ -69,7 +69,7 @@ impl QueryContext { pub const fn new( database: Option, schema: Option, - worksheet_id: Option, + worksheet_id: Option, ) -> Self { Self { database, @@ -145,12 +145,12 @@ impl UserQuery { .as_any() .downcast_ref::() { - catalog_list_impl.refresh().await - } else { - Err(ExecutionError::RefreshCatalogList { - message: "Catalog list implementation is not castable".to_string(), - }) + catalog_list_impl + .refresh() + .await + .context(RefreshCatalogListSnafu)?; } + Ok(()) } #[allow(clippy::unwrap_used)] @@ -387,7 +387,7 @@ impl UserQuery { return match result { Ok(_) => status_response(), Err(err) => Err(err), - } + }; } }; @@ -536,7 +536,7 @@ impl UserQuery { return match result { Ok(_) => created_entity_response(), Err(err) => Err(err), - } + }; } }; @@ -695,7 +695,7 @@ impl UserQuery { _ => { return Err(ExecutionError::InvalidTableIdentifier { ident: name.to_string(), - }) + }); } }; @@ -887,8 +887,9 @@ impl UserQuery { } } } - let select_query = - format!("SELECT {values} FROM {source_query} LEFT JOIN {target_table} {target_alias} ON {on}{where_clause_str}"); + let select_query = format!( + "SELECT {values} FROM {source_query} LEFT JOIN {target_table} {target_alias} ON {on}{where_clause_str}" + ); // Construct the INSERT statement let insert_query = format!("INSERT INTO {target_table} ({columns}) {select_query}"); @@ -930,7 +931,7 @@ impl UserQuery { return match result { Ok(_) => created_entity_response(), Err(err) => Err(err), - } + }; } }; @@ -1496,7 +1497,7 @@ impl UserQuery { "Table functions are not supported in INSERT statements" .to_string(), ), - }) + }); } }; @@ -1814,11 +1815,10 @@ pub fn created_entity_response() -> ExecutionResult> { DataType::Int64, false, )])); - Ok(vec![RecordBatch::try_new( - schema, - vec![Arc::new(Int64Array::from(vec![0]))], - ) - .context(ex_error::ArrowSnafu)?]) + Ok(vec![ + RecordBatch::try_new(schema, vec![Arc::new(Int64Array::from(vec![0]))]) + .context(ex_error::ArrowSnafu)?, + ]) } pub fn status_response() -> ExecutionResult> { diff --git a/crates/runtime/src/execution/service.rs b/crates/core-executor/src/service.rs similarity index 98% rename from crates/runtime/src/execution/service.rs rename to crates/core-executor/src/service.rs index 1cce35ae1..9f4ee3187 100644 --- a/crates/runtime/src/execution/service.rs +++ b/crates/core-executor/src/service.rs @@ -3,8 +3,8 @@ use std::{collections::HashMap, sync::Arc}; use bytes::{Buf, Bytes}; use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::csv::reader::Format; use datafusion::arrow::csv::ReaderBuilder; +use datafusion::arrow::csv::reader::Format; use datafusion::catalog::CatalogProvider; use datafusion::catalog::{MemoryCatalogProvider, MemorySchemaProvider}; use datafusion::datasource::memory::MemTable; @@ -15,9 +15,9 @@ use super::{ models::ColumnInfo, query::QueryContext, session::UserSession, - utils::{convert_record_batches, Config}, + utils::{Config, convert_record_batches}, }; -use embucket_metastore::{Metastore, TableIdent as MetastoreTableIdent}; +use core_metastore::{Metastore, TableIdent as MetastoreTableIdent}; use tokio::sync::RwLock; use uuid::Uuid; diff --git a/crates/runtime/src/execution/session.rs b/crates/core-executor/src/session.rs similarity index 67% rename from crates/runtime/src/execution/session.rs rename to crates/core-executor/src/session.rs index 982e7a4ed..5758bfaa1 100644 --- a/crates/runtime/src/execution/session.rs +++ b/crates/core-executor/src/session.rs @@ -1,28 +1,31 @@ //use super::datafusion::functions::geospatial::register_udfs as register_geo_udfs; -use super::datafusion::functions::aggregate::register_udafs; use super::datafusion::functions::register_udfs; use super::datafusion::type_planner::CustomTypePlanner; use super::dedicated_executor::DedicatedExecutor; -use super::error::{self as ex_error, ExecutionError, ExecutionResult}; +use super::error::{ + self as ex_error, ExecutionError, ExecutionResult, RefreshCatalogListSnafu, + RegisterCatalogSnafu, +}; use super::query::{QueryContext, UserQuery}; -use crate::execution::catalog::catalog_list::{EmbucketCatalogList, DEFAULT_CATALOG}; -use crate::execution::datafusion::analyzer::IcebergTypesAnalyzer; +use crate::datafusion::analyzer::IcebergTypesAnalyzer; use aws_config::{BehaviorVersion, Region, SdkConfig}; -use aws_credential_types::provider::SharedCredentialsProvider; use aws_credential_types::Credentials; +use aws_credential_types::provider::SharedCredentialsProvider; +use core_metastore::error::MetastoreError; +use core_metastore::{AwsCredentials, Metastore, VolumeType as MetastoreVolumeType}; +use core_utils::scan_iterator::ScanIterator; use datafusion::catalog::CatalogProvider; use datafusion::common::error::Result as DFResult; -use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::execution::SessionStateBuilder; +use datafusion::execution::runtime_env::RuntimeEnvBuilder; use datafusion::prelude::{SessionConfig, SessionContext}; use datafusion::sql::planner::IdentNormalizer; use datafusion_common::config::{ConfigEntry, ConfigExtension, ExtensionOptions}; use datafusion_functions_json::register_all as register_json_udfs; use datafusion_iceberg::catalog::catalog::IcebergCatalog as DataFusionIcebergCatalog; use datafusion_iceberg::planner::IcebergQueryPlanner; -use embucket_metastore::error::MetastoreError; -use embucket_metastore::{AwsCredentials, Metastore, VolumeType as MetastoreVolumeType}; -use embucket_utils::scan_iterator::ScanIterator; +use df_builtins::register_udafs; +use df_catalog::catalog_list::{DEFAULT_CATALOG, EmbucketCatalogList}; // TODO: We need to fix this after geodatafusion is updated to datafusion 47 //use geodatafusion::udf::native::register_native as register_geo_native; use iceberg_rust::object_store::ObjectStoreBuilder; @@ -76,8 +79,14 @@ impl UserSession { //register_geo_native(&ctx); //register_geo_udfs(&ctx); - catalog_list_impl.register_catalogs().await?; - catalog_list_impl.refresh().await?; + catalog_list_impl + .register_catalogs() + .await + .context(RegisterCatalogSnafu)?; + catalog_list_impl + .refresh() + .await + .context(RefreshCatalogListSnafu)?; let enable_ident_normalization = ctx.enable_ident_normalization(); let session = Self { @@ -241,3 +250,106 @@ impl ExtensionOptions for SessionParams { .collect() } } + +#[cfg(test)] +mod tests { + use super::*; + use std::sync::Arc; + + use core_metastore::{ + Database as MetastoreDatabase, Metastore, Schema as MetastoreSchema, + SchemaIdent as MetastoreSchemaIdent, SlateDBMetastore, Volume as MetastoreVolume, + }; + + use crate::{query::QueryContext, session::UserSession}; + + #[tokio::test] + #[allow(clippy::expect_used, clippy::manual_let_else, clippy::too_many_lines)] + async fn test_create_table_and_insert() { + let metastore = SlateDBMetastore::new_in_memory().await; + metastore + .create_volume( + &"test_volume".to_string(), + MetastoreVolume::new( + "test_volume".to_string(), + core_metastore::VolumeType::Memory, + ), + ) + .await + .expect("Failed to create volume"); + metastore + .create_database( + &"benchmark".to_string(), + MetastoreDatabase { + ident: "benchmark".to_string(), + properties: None, + volume: "test_volume".to_string(), + }, + ) + .await + .expect("Failed to create database"); + let schema_ident = MetastoreSchemaIdent { + database: "benchmark".to_string(), + schema: "public".to_string(), + }; + metastore + .create_schema( + &schema_ident.clone(), + MetastoreSchema { + ident: schema_ident, + properties: None, + }, + ) + .await + .expect("Failed to create schema"); + let session = Arc::new( + UserSession::new(metastore) + .await + .expect("Failed to create user session"), + ); + let create_query = r" + CREATE TABLE benchmark.public.hits + ( + WatchID BIGINT NOT NULL, + JavaEnable INTEGER NOT NULL, + Title TEXT NOT NULL, + GoodEvent INTEGER NOT NULL, + EventTime BIGINT NOT NULL, + EventDate INTEGER NOT NULL, + CounterID INTEGER NOT NULL, + ClientIP INTEGER NOT NULL, + PRIMARY KEY (CounterID, EventDate, EventTime, WatchID) + ); + "; + let mut query1 = session.query(create_query, QueryContext::default()); + + let statement = query1.parse_query().expect("Failed to parse query"); + let result = query1.execute().await.expect("Failed to execute query"); + + let all_query = session + .query("SHOW TABLES", QueryContext::default()) + .execute() + .await + .expect("Failed to execute query"); + + let insert_query = session + .query( + "INSERT INTO benchmark.public.hits VALUES (1, 1, 'test', 1, 1, 1, 1, 1)", + QueryContext::default(), + ) + .execute() + .await + .expect("Failed to execute query"); + + let select_query = session + .query( + "SELECT * FROM benchmark.public.hits", + QueryContext::default(), + ) + .execute() + .await + .expect("Failed to execute query"); + + insta::assert_debug_snapshot!((statement, result, all_query, insert_query, select_query)); + } +} diff --git a/crates/runtime/src/tests/snapshots/embucket_runtime__tests__session__create_table_and_insert.snap b/crates/core-executor/src/snapshots/core_executor__session__tests__create_table_and_insert.snap similarity index 100% rename from crates/runtime/src/tests/snapshots/embucket_runtime__tests__session__create_table_and_insert.snap rename to crates/core-executor/src/snapshots/core_executor__session__tests__create_table_and_insert.snap diff --git a/crates/runtime/src/execution/tests/mod.rs b/crates/core-executor/src/tests/mod.rs similarity index 100% rename from crates/runtime/src/execution/tests/mod.rs rename to crates/core-executor/src/tests/mod.rs diff --git a/crates/runtime/src/execution/tests/query.rs b/crates/core-executor/src/tests/query.rs similarity index 80% rename from crates/runtime/src/execution/tests/query.rs rename to crates/core-executor/src/tests/query.rs index bb48ee714..c218b84b7 100644 --- a/crates/runtime/src/execution/tests/query.rs +++ b/crates/core-executor/src/tests/query.rs @@ -1,20 +1,20 @@ -use crate::execution::query::{QueryContext, UserQuery}; -use crate::execution::session::UserSession; +use crate::query::{QueryContext, UserQuery}; +use crate::session::UserSession; -use crate::execution::error::{ExecutionError, ExecutionResult}; -use crate::execution::service::{CoreExecutionService, ExecutionService}; -use crate::execution::utils::{Config, DataSerializationFormat}; -use crate::SlateDBMetastore; +use crate::error::{ExecutionError, ExecutionResult}; +use crate::service::{CoreExecutionService, ExecutionService}; +use crate::utils::{Config, DataSerializationFormat}; +use core_metastore::Metastore; +use core_metastore::SlateDBMetastore; +use core_metastore::{ + Database as MetastoreDatabase, Schema as MetastoreSchema, SchemaIdent as MetastoreSchemaIdent, + TableIdent as MetastoreTableIdent, Volume as MetastoreVolume, +}; use datafusion::assert_batches_eq; use datafusion::sql::parser::{DFParser, Statement as DFStatement}; -use datafusion::sql::sqlparser::ast::visit_expressions; use datafusion::sql::sqlparser::ast::Statement as SQLStatement; +use datafusion::sql::sqlparser::ast::visit_expressions; use datafusion::sql::sqlparser::ast::{Expr, ObjectName, ObjectNamePart}; -use embucket_metastore::Metastore; -use embucket_metastore::{ - Database as MetastoreDatabase, Schema as MetastoreSchema, SchemaIdent as MetastoreSchemaIdent, - TableIdent as MetastoreTableIdent, Volume as MetastoreVolume, -}; use sqlparser::ast::{ Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, FunctionArguments, Ident, }; @@ -299,7 +299,10 @@ async fn test_create_table_with_timestamp_nanosecond() { table: "target_table".to_string(), }; // Verify that the file was uploaded successfully by running select * from the table - let query = format!("CREATE TABLE {}.{}.{} (id INT, ts TIMESTAMP_NTZ(9)) as VALUES (1, '2025-04-09T21:11:23'), (2, '2025-04-09T21:11:00');", table_ident.database, table_ident.schema, table_ident.table); + let query = format!( + "CREATE TABLE {}.{}.{} (id INT, ts TIMESTAMP_NTZ(9)) as VALUES (1, '2025-04-09T21:11:23'), (2, '2025-04-09T21:11:00');", + table_ident.database, table_ident.schema, table_ident.table + ); let (rows, _) = execution_svc .query(&session_id, &query, QueryContext::default()) .await @@ -489,7 +492,7 @@ async fn prepare_env() -> (CoreExecutionService, Arc, String) &"test_volume".to_string(), MetastoreVolume::new( "test_volume".to_string(), - embucket_metastore::VolumeType::Memory, + core_metastore::VolumeType::Memory, ), ) .await @@ -534,3 +537,111 @@ async fn prepare_env() -> (CoreExecutionService, Arc, String) .expect("Failed to create session"); (execution_svc, metastore, session_id.to_string()) } + +static TABLE_SETUP: &str = include_str!(r"./table_setup.sql"); + +#[allow(clippy::unwrap_used, clippy::expect_used)] +pub async fn create_df_session() -> Arc { + let metastore = SlateDBMetastore::new_in_memory().await; + metastore + .create_volume( + &"test_volume".to_string(), + MetastoreVolume::new( + "test_volume".to_string(), + core_metastore::VolumeType::Memory, + ), + ) + .await + .expect("Failed to create volume"); + metastore + .create_database( + &"embucket".to_string(), + MetastoreDatabase { + ident: "embucket".to_string(), + properties: None, + volume: "test_volume".to_string(), + }, + ) + .await + .expect("Failed to create database"); + let schema_ident = MetastoreSchemaIdent { + database: "embucket".to_string(), + schema: "public".to_string(), + }; + metastore + .create_schema( + &schema_ident.clone(), + MetastoreSchema { + ident: schema_ident, + properties: None, + }, + ) + .await + .expect("Failed to create schema"); + + let user_session = Arc::new( + UserSession::new(metastore) + .await + .expect("Failed to create user session"), + ); + + for query in TABLE_SETUP.split(';') { + if !query.is_empty() { + let mut query = user_session.query(query, QueryContext::default()); + query.execute().await.unwrap(); + //ctx.sql(query).await.unwrap().collect().await.unwrap(); + } + } + user_session +} + +macro_rules! test_query { + ($test_fn_name:ident, $query:expr) => { + paste::paste! { + #[tokio::test] + async fn [< query_ $test_fn_name >]() { + let ctx = create_df_session().await; + + let mut query = ctx.query($query, crate::query::QueryContext::default()); + let res = query.execute().await; + insta::with_settings!({ + description => stringify!($query), + omit_expression => true, + prepend_module_to_snapshot => false + }, { + let df = match res { + Ok(record_batches) => { + Ok(datafusion::arrow::util::pretty::pretty_format_batches(&record_batches).unwrap().to_string()) + }, + Err(e) => Err(format!("Error: {e}")) + }; + let df = df.map(|df| df.split("\n").map(|s| s.to_string()).collect::>()); + insta::assert_debug_snapshot!((df)); + }) + } + } + } + } + +test_query!(select_date_add_diff, "SELECT dateadd(day, 5, '2025-06-01')"); +test_query!(func_date_add, "SELECT date_add(day, 30, '2025-01-06')"); +// // SELECT +test_query!(select_star, "SELECT * FROM employee_table"); +// FIXME: ILIKE is not supported yet +// test_query!(select_ilike, "SELECT * ILIKE '%id%' FROM employee_table;"); +test_query!( + select_exclude, + "SELECT * EXCLUDE department_id FROM employee_table;" +); +test_query!( + select_exclude_multiple, + "SELECT * EXCLUDE (department_id, employee_id) FROM employee_table;" +); + +test_query!( + qualify, + "SELECT product_id, retail_price, quantity, city + FROM sales + QUALIFY ROW_NUMBER() OVER (PARTITION BY city ORDER BY retail_price) = 1 + ;" +); diff --git a/crates/runtime/src/execution/tests/service.rs b/crates/core-executor/src/tests/service.rs similarity index 53% rename from crates/runtime/src/execution/tests/service.rs rename to crates/core-executor/src/tests/service.rs index 928057de0..ff1e7cd59 100644 --- a/crates/runtime/src/execution/tests/service.rs +++ b/crates/core-executor/src/tests/service.rs @@ -1,18 +1,14 @@ -use crate::execution::query::QueryContext; -use crate::execution::recording_service::RecordingExecutionService; -use crate::execution::service::{CoreExecutionService, ExecutionService}; -use crate::execution::utils::{Config, DataSerializationFormat}; -use crate::SlateDBMetastore; -use datafusion::{arrow::csv::reader::Format, assert_batches_eq}; -use embucket_history::{GetQueries, SlateDBWorksheetsStore, Worksheet, WorksheetsStore}; -use embucket_metastore::models::table::TableIdent as MetastoreTableIdent; -use embucket_metastore::Metastore; -use embucket_metastore::{ +use crate::query::QueryContext; +use crate::service::{CoreExecutionService, ExecutionService}; +use crate::utils::{Config, DataSerializationFormat}; +use core_metastore::Metastore; +use core_metastore::SlateDBMetastore; +use core_metastore::models::table::TableIdent as MetastoreTableIdent; +use core_metastore::{ Database as MetastoreDatabase, Schema as MetastoreSchema, SchemaIdent as MetastoreSchemaIdent, Volume as MetastoreVolume, }; -use embucket_utils::Db; -use std::sync::Arc; +use datafusion::{arrow::csv::reader::Format, assert_batches_eq}; #[tokio::test] #[allow(clippy::expect_used)] @@ -54,7 +50,7 @@ async fn test_service_upload_file() { &"test_volume".to_string(), MetastoreVolume::new( "test_volume".to_string(), - embucket_metastore::VolumeType::Memory, + core_metastore::VolumeType::Memory, ), ) .await @@ -185,7 +181,7 @@ async fn test_service_create_table_file_volume() { &"test_volume".to_string(), MetastoreVolume::new( "test_volume".to_string(), - embucket_metastore::VolumeType::File(embucket_metastore::FileVolume { + core_metastore::VolumeType::File(core_metastore::FileVolume { path: temp_path.to_string(), }), ), @@ -235,7 +231,9 @@ async fn test_service_create_table_file_volume() { .await .expect("Failed to create session"); - let create_table_sql = format!("CREATE TABLE {table_ident} (id INT, name STRING, value FLOAT) as VALUES (1, 'test1', 100.0), (2, 'test2', 200.0), (3, 'test3', 300.0)"); + let create_table_sql = format!( + "CREATE TABLE {table_ident} (id INT, name STRING, value FLOAT) as VALUES (1, 'test1', 100.0), (2, 'test2', 200.0), (3, 'test3', 300.0)" + ); let (res, _) = execution_svc .query(session_id, &create_table_sql, QueryContext::default()) .await @@ -252,7 +250,9 @@ async fn test_service_create_table_file_volume() { &res ); - let insert_sql = format!("INSERT INTO {table_ident} (id, name, value) VALUES (4, 'test4', 400.0), (5, 'test5', 500.0)"); + let insert_sql = format!( + "INSERT INTO {table_ident} (id, name, value) VALUES (4, 'test4', 400.0), (5, 'test5', 500.0)" + ); let (res, _) = execution_svc .query(session_id, &insert_sql, QueryContext::default()) .await @@ -269,211 +269,3 @@ async fn test_service_create_table_file_volume() { &res ); } - -#[tokio::test] -#[allow(clippy::expect_used, clippy::too_many_lines)] -async fn test_recording_service() { - let db = Db::memory().await; - let metastore = Arc::new(SlateDBMetastore::new(db.clone())); - let history_store = Arc::new(SlateDBWorksheetsStore::new(db)); - let execution_svc = Arc::new(CoreExecutionService::new( - metastore.clone(), - Config { - dbt_serialization_format: DataSerializationFormat::Json, - }, - )); - let execution_svc = - RecordingExecutionService::new(execution_svc.clone(), history_store.clone()); - - metastore - .create_volume( - &"test_volume".to_string(), - MetastoreVolume::new( - "test_volume".to_string(), - embucket_metastore::VolumeType::Memory, - ), - ) - .await - .expect("Failed to create volume"); - - let database_name = "embucket".to_string(); - - metastore - .create_database( - &database_name.clone(), - MetastoreDatabase { - ident: "embucket".to_string(), - properties: None, - volume: "test_volume".to_string(), - }, - ) - .await - .expect("Failed to create database"); - - let session_id = "test_session_id"; - execution_svc - .create_session(session_id.to_string()) - .await - .expect("Failed to create session"); - - let schema_name = "public".to_string(); - - let context = QueryContext::new(Some(database_name.clone()), Some(schema_name.clone()), None); - - //Good query - execution_svc - .query( - session_id, - format!( - "CREATE SCHEMA {}.{}", - database_name.clone(), - schema_name.clone() - ) - .as_str(), - context.clone(), - ) - .await - .expect("Failed to add schema"); - - assert_eq!( - 1, - history_store - .get_queries(GetQueries::default()) - .await - .expect("Failed to get queries") - .len() - ); - - //Failing query - execution_svc - .query( - session_id, - format!( - "CREATE SCHEMA {}.{}", - database_name.clone(), - schema_name.clone() - ) - .as_str(), - context.clone(), - ) - .await - .expect_err("Failed to not add schema"); - - assert_eq!( - 2, - history_store - .get_queries(GetQueries::default()) - .await - .expect("Failed to get queries") - .len() - ); - - let table_name = "test1".to_string(); - - //Create table queries - execution_svc - .query( - session_id, - format!( - "create TABLE {}.{}.{} - external_volume = '' - catalog = '' - base_location = '' - ( - APP_ID TEXT, - PLATFORM TEXT, - EVENT TEXT, - TXN_ID NUMBER(38,0), - EVENT_TIME TEXT - );", - database_name.clone(), - schema_name.clone(), - table_name.clone() - ) - .as_str(), - context.clone(), - ) - .await - .expect("Failed to create table"); - - assert_eq!( - 3, - history_store - .get_queries(GetQueries::default()) - .await - .expect("Failed to get queries") - .len() - ); - - //Insert into query - execution_svc - .query( - session_id, - format!( - "INSERT INTO {}.{}.{} (APP_ID, PLATFORM, EVENT, TXN_ID, EVENT_TIME) - VALUES ('12345', 'iOS', 'login', '123456', '2021-01-01T00:00:00'), - ('67890', 'Android', 'purchase', '456789', '2021-01-01T00:02:00')", - database_name.clone(), - schema_name.clone(), - table_name.clone() - ) - .as_str(), - context.clone(), - ) - .await - .expect("Failed to insert into"); - - assert_eq!( - 4, - history_store - .get_queries(GetQueries::default()) - .await - .expect("Failed to get queries") - .len() - ); - - //With worksheet - let worksheet = history_store - .add_worksheet(Worksheet::new("Testing1".to_string(), String::new())) - .await - .expect("Failed to add worksheet"); - - assert_eq!( - 0, - history_store - .get_queries(GetQueries::default().with_worksheet_id(worksheet.clone().id)) - .await - .expect("Failed to get queries") - .len() - ); - - execution_svc - .query( - session_id, - format!( - "INSERT INTO {}.{}.{} (APP_ID, PLATFORM, EVENT, TXN_ID, EVENT_TIME) - VALUES ('1234', 'iOS', 'login', '123456', '2021-01-01T00:00:00'), - ('6789', 'Android', 'purchase', '456789', '2021-01-01T00:02:00')", - database_name.clone(), - schema_name.clone(), - table_name.clone() - ) - .as_str(), - QueryContext::new( - Some(database_name.clone()), - Some(schema_name.clone()), - Some(worksheet.clone().id), - ), - ) - .await - .expect("Failed to insert into"); - - assert_eq!( - 1, - history_store - .get_queries(GetQueries::default().with_worksheet_id(worksheet.clone().id)) - .await - .expect("Failed to get queries") - .len() - ); -} diff --git a/crates/runtime/src/execution/tests/snapshots/embucket_runtime__execution__tests__query__context_name_injection.snap b/crates/core-executor/src/tests/snapshots/core_executor__tests__query__context_name_injection.snap similarity index 100% rename from crates/runtime/src/execution/tests/snapshots/embucket_runtime__execution__tests__query__context_name_injection.snap rename to crates/core-executor/src/tests/snapshots/core_executor__tests__query__context_name_injection.snap diff --git a/crates/runtime/src/tests/snapshots/query_func_date_add.snap b/crates/core-executor/src/tests/snapshots/query_func_date_add.snap similarity index 100% rename from crates/runtime/src/tests/snapshots/query_func_date_add.snap rename to crates/core-executor/src/tests/snapshots/query_func_date_add.snap diff --git a/crates/runtime/src/tests/snapshots/query_qualify.snap b/crates/core-executor/src/tests/snapshots/query_qualify.snap similarity index 100% rename from crates/runtime/src/tests/snapshots/query_qualify.snap rename to crates/core-executor/src/tests/snapshots/query_qualify.snap diff --git a/crates/runtime/src/tests/snapshots/query_select_date_add_diff.snap b/crates/core-executor/src/tests/snapshots/query_select_date_add_diff.snap similarity index 100% rename from crates/runtime/src/tests/snapshots/query_select_date_add_diff.snap rename to crates/core-executor/src/tests/snapshots/query_select_date_add_diff.snap diff --git a/crates/runtime/src/tests/snapshots/query_select_exclude.snap b/crates/core-executor/src/tests/snapshots/query_select_exclude.snap similarity index 100% rename from crates/runtime/src/tests/snapshots/query_select_exclude.snap rename to crates/core-executor/src/tests/snapshots/query_select_exclude.snap diff --git a/crates/runtime/src/tests/snapshots/query_select_exclude_multiple.snap b/crates/core-executor/src/tests/snapshots/query_select_exclude_multiple.snap similarity index 100% rename from crates/runtime/src/tests/snapshots/query_select_exclude_multiple.snap rename to crates/core-executor/src/tests/snapshots/query_select_exclude_multiple.snap diff --git a/crates/runtime/src/tests/snapshots/query_select_star.snap b/crates/core-executor/src/tests/snapshots/query_select_star.snap similarity index 100% rename from crates/runtime/src/tests/snapshots/query_select_star.snap rename to crates/core-executor/src/tests/snapshots/query_select_star.snap diff --git a/crates/runtime/src/tests/queries/table_setup.sql b/crates/core-executor/src/tests/table_setup.sql similarity index 100% rename from crates/runtime/src/tests/queries/table_setup.sql rename to crates/core-executor/src/tests/table_setup.sql diff --git a/crates/runtime/src/execution/utils.rs b/crates/core-executor/src/utils.rs similarity index 95% rename from crates/runtime/src/execution/utils.rs rename to crates/core-executor/src/utils.rs index 7ca3f2016..401fd89bc 100644 --- a/crates/runtime/src/execution/utils.rs +++ b/crates/core-executor/src/utils.rs @@ -1,27 +1,27 @@ use super::models::ColumnInfo; use chrono::DateTime; +use core_metastore::SchemaIdent as MetastoreSchemaIdent; +use core_metastore::TableIdent as MetastoreTableIdent; use datafusion::arrow::array::ArrayRef; use datafusion::arrow::array::{ Array, Decimal128Array, Int16Array, Int32Array, Int64Array, StringArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray, - TimestampSecondArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, UnionArray, + TimestampSecondArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, UnionArray, }; use datafusion::arrow::compute::cast; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::datatypes::{Field, Schema, TimeUnit}; use datafusion::arrow::record_batch::RecordBatch; use datafusion::common::Result as DataFusionResult; -use datafusion_common::ScalarValue; +use datafusion::common::ScalarValue; use datafusion_expr::{Expr, LogicalPlan}; -use embucket_metastore::SchemaIdent as MetastoreSchemaIdent; -use embucket_metastore::TableIdent as MetastoreTableIdent; use sqlparser::ast::{Ident, ObjectName}; use std::collections::HashMap; use std::sync::Arc; use strum::{Display, EnumString}; // This isn't the best way to do this, but it'll do for now -// TODO: Revisit +// TODO: Should be moved to api-snowflake-rest pub struct Config { pub dbt_serialization_format: DataSerializationFormat, } @@ -670,21 +670,23 @@ mod tests { #[test] fn test_convert_record_batches_uint() { - let record_batches = vec![RecordBatch::try_new( - Arc::new(Schema::new(vec![ - Field::new("row_num_uint64", DataType::UInt64, false), - Field::new("row_num_uint32", DataType::UInt32, false), - Field::new("row_num_uint16", DataType::UInt16, false), - Field::new("row_num_uint8", DataType::UInt8, false), - ])), - vec![ - Arc::new(UInt64Array::from(vec![0, 1, u64::MAX])), - Arc::new(UInt32Array::from(vec![0, 1, u32::MAX])), - Arc::new(UInt16Array::from(vec![0, 1, u16::MAX])), - Arc::new(UInt8Array::from(vec![0, 1, u8::MAX])), - ], - ) - .unwrap()]; + let record_batches = vec![ + RecordBatch::try_new( + Arc::new(Schema::new(vec![ + Field::new("row_num_uint64", DataType::UInt64, false), + Field::new("row_num_uint32", DataType::UInt32, false), + Field::new("row_num_uint16", DataType::UInt16, false), + Field::new("row_num_uint8", DataType::UInt8, false), + ])), + vec![ + Arc::new(UInt64Array::from(vec![0, 1, u64::MAX])), + Arc::new(UInt32Array::from(vec![0, 1, u32::MAX])), + Arc::new(UInt16Array::from(vec![0, 1, u16::MAX])), + Arc::new(UInt8Array::from(vec![0, 1, u8::MAX])), + ], + ) + .unwrap(), + ]; let (converted_batches, column_infos) = convert_record_batches(record_batches.clone(), DataSerializationFormat::Arrow).unwrap(); diff --git a/crates/history/Cargo.toml b/crates/core-history/Cargo.toml similarity index 66% rename from crates/history/Cargo.toml rename to crates/core-history/Cargo.toml index 62ef85908..20ab9e5ac 100644 --- a/crates/history/Cargo.toml +++ b/crates/core-history/Cargo.toml @@ -1,13 +1,18 @@ [package] -name = "embucket_history" +name = "core-history" version = "0.1.0" -edition = "2021" -license-file = { workspace = true } +edition = "2024" +license-file.workspace = true [dependencies] -embucket_utils = { path = "../utils" } +core-utils = { path = "../core-utils" } +core-executor = { path = "../core-executor" } +core-metastore = { path = "../core-metastore" } + bytes = { workspace = true } +datafusion = { workspace = true } chrono = { workspace = true, features = ["serde"] } +indexmap = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } utoipa = { workspace = true } diff --git a/crates/history/src/entities/mod.rs b/crates/core-history/src/entities/mod.rs similarity index 100% rename from crates/history/src/entities/mod.rs rename to crates/core-history/src/entities/mod.rs diff --git a/crates/history/src/entities/query.rs b/crates/core-history/src/entities/query.rs similarity index 98% rename from crates/history/src/entities/query.rs rename to crates/core-history/src/entities/query.rs index dbba73cef..19f8330b4 100644 --- a/crates/history/src/entities/query.rs +++ b/crates/core-history/src/entities/query.rs @@ -1,7 +1,7 @@ use crate::WorksheetId; use bytes::Bytes; use chrono::{DateTime, Utc}; -use embucket_utils::iterable::IterableEntity; +use core_utils::iterable::IterableEntity; #[cfg(test)] use mockall::automock; use serde::{Deserialize, Serialize}; diff --git a/crates/history/src/entities/worksheet.rs b/crates/core-history/src/entities/worksheet.rs similarity index 97% rename from crates/history/src/entities/worksheet.rs rename to crates/core-history/src/entities/worksheet.rs index 96598d047..3de6f2cb6 100644 --- a/crates/history/src/entities/worksheet.rs +++ b/crates/core-history/src/entities/worksheet.rs @@ -1,6 +1,6 @@ use bytes::Bytes; use chrono::{DateTime, Utc}; -use embucket_utils::iterable::IterableEntity; +use core_utils::iterable::IterableEntity; use serde::{Deserialize, Serialize}; pub type WorksheetId = i64; diff --git a/crates/history/src/entities/worksheet_query_ref.rs b/crates/core-history/src/entities/worksheet_query_ref.rs similarity index 96% rename from crates/history/src/entities/worksheet_query_ref.rs rename to crates/core-history/src/entities/worksheet_query_ref.rs index d7516b0f7..9485a92a1 100644 --- a/crates/history/src/entities/worksheet_query_ref.rs +++ b/crates/core-history/src/entities/worksheet_query_ref.rs @@ -1,6 +1,6 @@ use crate::{QueryRecordId, WorksheetId}; use bytes::Bytes; -use embucket_utils::iterable::IterableEntity; +use core_utils::iterable::IterableEntity; use serde::{Deserialize, Serialize}; // QueryRecordReference struct is used for referencing QueryRecord from worksheet. diff --git a/crates/history/src/history_store.rs b/crates/core-history/src/history_store.rs similarity index 96% rename from crates/history/src/history_store.rs rename to crates/core-history/src/history_store.rs index 6baca5c8c..834c00261 100644 --- a/crates/history/src/history_store.rs +++ b/crates/core-history/src/history_store.rs @@ -3,8 +3,8 @@ use crate::{ WorksheetId, }; use async_trait::async_trait; -use embucket_utils::iterable::IterableCursor; -use embucket_utils::{Db, Error}; +use core_utils::iterable::IterableCursor; +use core_utils::{Db, Error}; use futures::future::join_all; use serde_json::de; use slatedb::DbIterator; @@ -18,28 +18,28 @@ pub enum WorksheetsStoreError { BadKey { source: std::str::Utf8Error }, #[snafu(display("Error adding worksheet: {source}"))] - WorksheetAdd { source: embucket_utils::Error }, + WorksheetAdd { source: core_utils::Error }, #[snafu(display("Error getting worksheet: {source}"))] - WorksheetGet { source: embucket_utils::Error }, + WorksheetGet { source: core_utils::Error }, #[snafu(display("Error getting worksheets: {source}"))] - WorksheetsList { source: embucket_utils::Error }, + WorksheetsList { source: core_utils::Error }, #[snafu(display("Error deleting worksheet: {source}"))] - WorksheetDelete { source: embucket_utils::Error }, + WorksheetDelete { source: core_utils::Error }, #[snafu(display("Error updating worksheet: {source}"))] - WorksheetUpdate { source: embucket_utils::Error }, + WorksheetUpdate { source: core_utils::Error }, #[snafu(display("Error adding query record: {source}"))] - QueryAdd { source: embucket_utils::Error }, + QueryAdd { source: core_utils::Error }, #[snafu(display("Error adding query record reference: {source}"))] - QueryReferenceAdd { source: embucket_utils::Error }, + QueryReferenceAdd { source: core_utils::Error }, #[snafu(display("Error getting query history: {source}"))] - QueryGet { source: embucket_utils::Error }, + QueryGet { source: core_utils::Error }, #[snafu(display("Can't locate worksheet by key: {message}"))] WorksheetNotFound { message: String }, @@ -294,7 +294,7 @@ mod tests { use super::*; use crate::*; use chrono::{Duration, TimeZone, Utc}; - use embucket_utils::iterable::{IterableCursor, IterableEntity}; + use core_utils::iterable::{IterableCursor, IterableEntity}; use tokio; fn create_query_records(templates: &[(Option, QueryStatus)]) -> Vec { diff --git a/crates/history/src/lib.rs b/crates/core-history/src/lib.rs similarity index 60% rename from crates/history/src/lib.rs rename to crates/core-history/src/lib.rs index b6135c37f..f15fb009d 100644 --- a/crates/history/src/lib.rs +++ b/crates/core-history/src/lib.rs @@ -1,7 +1,9 @@ pub mod entities; pub mod history_store; +pub mod recording_service; pub mod store; pub use entities::*; pub use history_store::*; +pub use recording_service::RecordingExecutionService; pub use store::*; diff --git a/crates/core-history/src/recording_service.rs b/crates/core-history/src/recording_service.rs new file mode 100644 index 000000000..7c7ca8449 --- /dev/null +++ b/crates/core-history/src/recording_service.rs @@ -0,0 +1,397 @@ +use crate::{QueryRecord, QueryRecordActions, WorksheetsStore}; +use bytes::Bytes; +use core_executor::{ + error::ExecutionResult, models::ColumnInfo, query::QueryContext, service::ExecutionService, + session::UserSession, utils::Config, +}; +use core_metastore::TableIdent as MetastoreTableIdent; +use datafusion::arrow::array::RecordBatch; +use datafusion::arrow::csv::reader::Format; +use datafusion::arrow::json::{WriterBuilder, writer::JsonArray}; +use indexmap::IndexMap; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use snafu::ResultExt; +use snafu::prelude::*; +use std::sync::Arc; +use utoipa::ToSchema; + +// +// TODO: This module is pending a rewrite +// TODO: simplify query function +// Is it possible to relax dependency on executor somehow such that history crate is not dependent on executor crate? +pub struct RecordingExecutionService { + pub execution: Arc, + pub store: Arc, +} + +//TODO: add tests +impl RecordingExecutionService { + pub fn new(execution: Arc, store: Arc) -> Self { + Self { execution, store } + } +} + +#[async_trait::async_trait] +impl ExecutionService for RecordingExecutionService { + async fn create_session(&self, session_id: String) -> ExecutionResult> { + self.execution.create_session(session_id).await + } + + async fn delete_session(&self, session_id: String) -> ExecutionResult<()> { + self.execution.delete_session(session_id).await + } + + async fn query( + &self, + session_id: &str, + query: &str, + query_context: QueryContext, + ) -> ExecutionResult<(Vec, Vec)> { + let mut query_record = QueryRecord::query_start(query, query_context.worksheet_id); + let query_res = self.execution.query(session_id, query, query_context).await; + match query_res { + Ok((ref records, ref columns)) => { + let result_set = ResultSet::query_result_to_result_set(records, columns); + match result_set { + Ok(result_set) => { + let encoded_res = serde_json::to_string(&result_set); + + if let Ok(encoded_res) = encoded_res { + let result_count = i64::try_from(records.len()).unwrap_or(0); + query_record.query_finished(result_count, Some(encoded_res)); + } + // failed to wrap query results + else if let Err(err) = encoded_res { + query_record.query_finished_with_error(err.to_string()); + } + } + // error getting result_set + Err(err) => { + query_record.query_finished_with_error(err.to_string()); + } + } + } + // query error + Err(ref err) => { + // query execution error + query_record.query_finished_with_error(err.to_string()); + } + } + // add query record + if let Err(err) = self.store.add_query(&query_record).await { + // do not raise error, just log ? + tracing::error!("{err}"); + } + query_res + } + async fn upload_data_to_table( + &self, + session_id: &str, + table_ident: &MetastoreTableIdent, + data: Bytes, + file_name: &str, + format: Format, + ) -> ExecutionResult { + self.execution + .upload_data_to_table(session_id, table_ident, data, file_name, format) + .await + } + + fn config(&self) -> &Config { + self.execution.config() + } +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct Column { + pub name: String, + pub r#type: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[schema(as = Row, value_type = Vec)] +pub struct Row(Vec); + +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ResultSet { + pub columns: Vec, + pub rows: Vec, +} + +#[derive(Debug, Snafu)] +pub enum ResultSetError { + #[snafu(display("Failed to create result set: {source}"))] + CreateResultSet { + source: datafusion::arrow::error::ArrowError, + }, + #[snafu(display("Failed to convert to utf8: {source}"))] + Utf8 { source: std::string::FromUtf8Error }, + #[snafu(display("Failed to parse result: {source}"))] + ResultParse { source: serde_json::Error }, +} + +impl ResultSet { + pub fn query_result_to_result_set( + records: &[RecordBatch], + columns: &[ColumnInfo], + ) -> std::result::Result { + let buf = Vec::new(); + let write_builder = WriterBuilder::new().with_explicit_nulls(true); + let mut writer = write_builder.build::<_, JsonArray>(buf); + + // serialize records to str + let records: Vec<&RecordBatch> = records.iter().collect(); + writer + .write_batches(&records) + .context(CreateResultSetSnafu)?; + writer.finish().context(CreateResultSetSnafu)?; + + // Get the underlying buffer back, + let buf = writer.into_inner(); + let record_batch_str = String::from_utf8(buf).context(Utf8Snafu)?; + + // convert to array, leaving only values + let rows: Vec> = + serde_json::from_str(record_batch_str.as_str()).context(ResultParseSnafu)?; + let rows: Vec = rows + .into_iter() + .map(|obj| Row(obj.values().cloned().collect())) + .collect(); + + let columns = columns + .iter() + .map(|ci| Column { + name: ci.name.clone(), + r#type: ci.r#type.clone(), + }) + .collect(); + + Ok(Self { columns, rows }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::recording_service::RecordingExecutionService; + use crate::{GetQueries, SlateDBWorksheetsStore, Worksheet, WorksheetsStore}; + use core_executor::service::CoreExecutionService; + use core_executor::utils::DataSerializationFormat; + use core_metastore::Metastore; + use core_metastore::SlateDBMetastore; + use core_metastore::{Database as MetastoreDatabase, Volume as MetastoreVolume}; + use core_utils::Db; + use std::sync::Arc; + + #[tokio::test] + #[allow(clippy::expect_used, clippy::too_many_lines)] + async fn test_recording_service() { + let db = Db::memory().await; + let metastore = Arc::new(SlateDBMetastore::new(db.clone())); + let history_store = Arc::new(SlateDBWorksheetsStore::new(db)); + let execution_svc = Arc::new(CoreExecutionService::new( + metastore.clone(), + Config { + dbt_serialization_format: DataSerializationFormat::Json, + }, + )); + let execution_svc = + RecordingExecutionService::new(execution_svc.clone(), history_store.clone()); + + metastore + .create_volume( + &"test_volume".to_string(), + MetastoreVolume::new( + "test_volume".to_string(), + core_metastore::VolumeType::Memory, + ), + ) + .await + .expect("Failed to create volume"); + + let database_name = "embucket".to_string(); + + metastore + .create_database( + &database_name.clone(), + MetastoreDatabase { + ident: "embucket".to_string(), + properties: None, + volume: "test_volume".to_string(), + }, + ) + .await + .expect("Failed to create database"); + + let session_id = "test_session_id"; + execution_svc + .create_session(session_id.to_string()) + .await + .expect("Failed to create session"); + + let schema_name = "public".to_string(); + + let context = + QueryContext::new(Some(database_name.clone()), Some(schema_name.clone()), None); + + //Good query + execution_svc + .query( + session_id, + format!( + "CREATE SCHEMA {}.{}", + database_name.clone(), + schema_name.clone() + ) + .as_str(), + context.clone(), + ) + .await + .expect("Failed to add schema"); + + assert_eq!( + 1, + history_store + .get_queries(GetQueries::default()) + .await + .expect("Failed to get queries") + .len() + ); + + //Failing query + execution_svc + .query( + session_id, + format!( + "CREATE SCHEMA {}.{}", + database_name.clone(), + schema_name.clone() + ) + .as_str(), + context.clone(), + ) + .await + .expect_err("Failed to not add schema"); + + assert_eq!( + 2, + history_store + .get_queries(GetQueries::default()) + .await + .expect("Failed to get queries") + .len() + ); + + let table_name = "test1".to_string(); + + //Create table queries + execution_svc + .query( + session_id, + format!( + "create TABLE {}.{}.{} + external_volume = '' + catalog = '' + base_location = '' + ( + APP_ID TEXT, + PLATFORM TEXT, + EVENT TEXT, + TXN_ID NUMBER(38,0), + EVENT_TIME TEXT + );", + database_name.clone(), + schema_name.clone(), + table_name.clone() + ) + .as_str(), + context.clone(), + ) + .await + .expect("Failed to create table"); + + assert_eq!( + 3, + history_store + .get_queries(GetQueries::default()) + .await + .expect("Failed to get queries") + .len() + ); + + //Insert into query + execution_svc + .query( + session_id, + format!( + "INSERT INTO {}.{}.{} (APP_ID, PLATFORM, EVENT, TXN_ID, EVENT_TIME) + VALUES ('12345', 'iOS', 'login', '123456', '2021-01-01T00:00:00'), + ('67890', 'Android', 'purchase', '456789', '2021-01-01T00:02:00')", + database_name.clone(), + schema_name.clone(), + table_name.clone() + ) + .as_str(), + context.clone(), + ) + .await + .expect("Failed to insert into"); + + assert_eq!( + 4, + history_store + .get_queries(GetQueries::default()) + .await + .expect("Failed to get queries") + .len() + ); + + //With worksheet + let worksheet = history_store + .add_worksheet(Worksheet::new("Testing1".to_string(), String::new())) + .await + .expect("Failed to add worksheet"); + + assert_eq!( + 0, + history_store + .get_queries(GetQueries::default().with_worksheet_id(worksheet.clone().id)) + .await + .expect("Failed to get queries") + .len() + ); + + execution_svc + .query( + session_id, + format!( + "INSERT INTO {}.{}.{} (APP_ID, PLATFORM, EVENT, TXN_ID, EVENT_TIME) + VALUES ('1234', 'iOS', 'login', '123456', '2021-01-01T00:00:00'), + ('6789', 'Android', 'purchase', '456789', '2021-01-01T00:02:00')", + database_name.clone(), + schema_name.clone(), + table_name.clone() + ) + .as_str(), + QueryContext::new( + Some(database_name.clone()), + Some(schema_name.clone()), + Some(worksheet.clone().id), + ), + ) + .await + .expect("Failed to insert into"); + + assert_eq!( + 1, + history_store + .get_queries(GetQueries::default().with_worksheet_id(worksheet.clone().id)) + .await + .expect("Failed to get queries") + .len() + ); + } +} diff --git a/crates/history/src/store.rs b/crates/core-history/src/store.rs similarity index 96% rename from crates/history/src/store.rs rename to crates/core-history/src/store.rs index 891be3009..d28ee925b 100644 --- a/crates/history/src/store.rs +++ b/crates/core-history/src/store.rs @@ -1,4 +1,4 @@ -use embucket_utils::Db; +use core_utils::Db; use std::sync::Arc; pub struct SlateDBWorksheetsStore { diff --git a/crates/metastore/Cargo.toml b/crates/core-metastore/Cargo.toml similarity index 76% rename from crates/metastore/Cargo.toml rename to crates/core-metastore/Cargo.toml index 6fa204ac9..97742e89d 100644 --- a/crates/metastore/Cargo.toml +++ b/crates/core-metastore/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "embucket_metastore" +name = "core-metastore" version = "0.1.0" edition = "2021" license-file = { workspace = true } @@ -8,23 +8,23 @@ license-file = { workspace = true } async-trait = { workspace = true } bytes = { workspace = true } chrono = { workspace = true } -dashmap = "6.1.0" +dashmap = { workspace = true } futures = { workspace = true } iceberg-rust = { workspace = true } iceberg-rust-spec = { workspace = true } -embucket_utils = { path = "../utils" } +core-utils = { path = "../core-utils" } object_store = { workspace = true } serde = { workspace = true } serde_json = { workspace = true } slatedb = { workspace = true } snafu = { workspace = true } -strum = { version = "0.26.3", features = ["derive"] } +strum = { workspace = true } tokio = { workspace = true } tracing = { workspace = true } -url = "2.5.2" +url = { workspace = true } utoipa = { workspace = true } uuid = { workspace = true } -validator = { version = "0.20.0", features = ["derive"] } +validator = { workspace = true } [dev-dependencies] insta = { version = "1.42.0", features = ["yaml", "filters", "redactions"] } diff --git a/crates/core-metastore/graph.png b/crates/core-metastore/graph.png new file mode 100644 index 000000000..6f079ae44 Binary files /dev/null and b/crates/core-metastore/graph.png differ diff --git a/crates/metastore/src/error.rs b/crates/core-metastore/src/error.rs similarity index 98% rename from crates/metastore/src/error.rs rename to crates/core-metastore/src/error.rs index 03e324b17..c6944604f 100644 --- a/crates/metastore/src/error.rs +++ b/crates/core-metastore/src/error.rs @@ -37,7 +37,7 @@ pub enum MetastoreError { SlateDB { source: slatedb::SlateDBError }, #[snafu(display("SlateDB error: {source}"))] - UtilSlateDB { source: embucket_utils::Error }, + UtilSlateDB { source: core_utils::Error }, #[snafu(display("Metastore object of type {type_name} with name {name} already exists"))] ObjectAlreadyExists { type_name: String, name: String }, diff --git a/crates/metastore/src/lib.rs b/crates/core-metastore/src/lib.rs similarity index 100% rename from crates/metastore/src/lib.rs rename to crates/core-metastore/src/lib.rs diff --git a/crates/metastore/src/metastore.rs b/crates/core-metastore/src/metastore.rs similarity index 99% rename from crates/metastore/src/metastore.rs rename to crates/core-metastore/src/metastore.rs index 57f394803..8ce9677a9 100644 --- a/crates/metastore/src/metastore.rs +++ b/crates/core-metastore/src/metastore.rs @@ -6,13 +6,13 @@ use crate::models::*; use async_trait::async_trait; use bytes::Bytes; use chrono::Utc; +use core_utils::Db; +use core_utils::scan_iterator::{ScanIterator, VecScanIterator}; use dashmap::DashMap; -use embucket_utils::scan_iterator::{ScanIterator, VecScanIterator}; -use embucket_utils::Db; use futures::{StreamExt, TryStreamExt}; use iceberg_rust::catalog::commit::apply_table_updates; use iceberg_rust_spec::table_metadata::{FormatVersion, TableMetadataBuilder}; -use object_store::{path::Path, ObjectStore, PutPayload}; +use object_store::{ObjectStore, PutPayload, path::Path}; use serde::de::DeserializeOwned; use snafu::ResultExt; use uuid::Uuid; diff --git a/crates/metastore/src/models/database.rs b/crates/core-metastore/src/models/database.rs similarity index 100% rename from crates/metastore/src/models/database.rs rename to crates/core-metastore/src/models/database.rs diff --git a/crates/metastore/src/models/mod.rs b/crates/core-metastore/src/models/mod.rs similarity index 100% rename from crates/metastore/src/models/mod.rs rename to crates/core-metastore/src/models/mod.rs diff --git a/crates/metastore/src/models/schema.rs b/crates/core-metastore/src/models/schema.rs similarity index 100% rename from crates/metastore/src/models/schema.rs rename to crates/core-metastore/src/models/schema.rs diff --git a/crates/metastore/src/models/table.rs b/crates/core-metastore/src/models/table.rs similarity index 100% rename from crates/metastore/src/models/table.rs rename to crates/core-metastore/src/models/table.rs diff --git a/crates/metastore/src/models/volumes.rs b/crates/core-metastore/src/models/volumes.rs similarity index 99% rename from crates/metastore/src/models/volumes.rs rename to crates/core-metastore/src/models/volumes.rs index 59b0b4454..6f2071fcd 100644 --- a/crates/metastore/src/models/volumes.rs +++ b/crates/core-metastore/src/models/volumes.rs @@ -1,5 +1,5 @@ use crate::error::{self as metastore_error, MetastoreResult}; -use object_store::{aws::AmazonS3Builder, path::Path, ObjectStore}; +use object_store::{ObjectStore, aws::AmazonS3Builder, path::Path}; use serde::{Deserialize, Serialize}; use snafu::ResultExt; use std::sync::Arc; diff --git a/crates/metastore/src/snapshots/embucket_metastore__metastore__tests__create_database.snap b/crates/core-metastore/src/snapshots/core_metastore__metastore__tests__create_database.snap similarity index 100% rename from crates/metastore/src/snapshots/embucket_metastore__metastore__tests__create_database.snap rename to crates/core-metastore/src/snapshots/core_metastore__metastore__tests__create_database.snap diff --git a/crates/metastore/src/snapshots/embucket_metastore__metastore__tests__create_volumes.snap b/crates/core-metastore/src/snapshots/core_metastore__metastore__tests__create_volumes.snap similarity index 100% rename from crates/metastore/src/snapshots/embucket_metastore__metastore__tests__create_volumes.snap rename to crates/core-metastore/src/snapshots/core_metastore__metastore__tests__create_volumes.snap diff --git a/crates/metastore/src/snapshots/embucket_metastore__metastore__tests__delete_volume.snap b/crates/core-metastore/src/snapshots/core_metastore__metastore__tests__delete_volume.snap similarity index 100% rename from crates/metastore/src/snapshots/embucket_metastore__metastore__tests__delete_volume.snap rename to crates/core-metastore/src/snapshots/core_metastore__metastore__tests__delete_volume.snap diff --git a/crates/metastore/src/snapshots/embucket_metastore__metastore__tests__duplicate_volume.snap b/crates/core-metastore/src/snapshots/core_metastore__metastore__tests__duplicate_volume.snap similarity index 100% rename from crates/metastore/src/snapshots/embucket_metastore__metastore__tests__duplicate_volume.snap rename to crates/core-metastore/src/snapshots/core_metastore__metastore__tests__duplicate_volume.snap diff --git a/crates/metastore/src/snapshots/embucket_metastore__metastore__tests__schemas.snap b/crates/core-metastore/src/snapshots/core_metastore__metastore__tests__schemas.snap similarity index 100% rename from crates/metastore/src/snapshots/embucket_metastore__metastore__tests__schemas.snap rename to crates/core-metastore/src/snapshots/core_metastore__metastore__tests__schemas.snap diff --git a/crates/metastore/src/snapshots/embucket_metastore__metastore__tests__tables.snap b/crates/core-metastore/src/snapshots/core_metastore__metastore__tests__tables.snap similarity index 100% rename from crates/metastore/src/snapshots/embucket_metastore__metastore__tests__tables.snap rename to crates/core-metastore/src/snapshots/core_metastore__metastore__tests__tables.snap diff --git a/crates/metastore/src/snapshots/embucket_metastore__metastore__tests__temporary_tables.snap b/crates/core-metastore/src/snapshots/core_metastore__metastore__tests__temporary_tables.snap similarity index 100% rename from crates/metastore/src/snapshots/embucket_metastore__metastore__tests__temporary_tables.snap rename to crates/core-metastore/src/snapshots/core_metastore__metastore__tests__temporary_tables.snap diff --git a/crates/metastore/src/snapshots/embucket_metastore__metastore__tests__update_volume.snap b/crates/core-metastore/src/snapshots/core_metastore__metastore__tests__update_volume.snap similarity index 100% rename from crates/metastore/src/snapshots/embucket_metastore__metastore__tests__update_volume.snap rename to crates/core-metastore/src/snapshots/core_metastore__metastore__tests__update_volume.snap diff --git a/crates/utils/Cargo.toml b/crates/core-utils/Cargo.toml similarity index 95% rename from crates/utils/Cargo.toml rename to crates/core-utils/Cargo.toml index 9b9500cbb..e3e04387b 100644 --- a/crates/utils/Cargo.toml +++ b/crates/core-utils/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "embucket_utils" +name = "core-utils" version = "0.1.0" edition = "2021" license-file = { workspace = true } diff --git a/crates/utils/src/iterable.rs b/crates/core-utils/src/iterable.rs similarity index 100% rename from crates/utils/src/iterable.rs rename to crates/core-utils/src/iterable.rs diff --git a/crates/utils/src/lib.rs b/crates/core-utils/src/lib.rs similarity index 99% rename from crates/utils/src/lib.rs rename to crates/core-utils/src/lib.rs index 9ba6f5136..ba05da350 100644 --- a/crates/utils/src/lib.rs +++ b/crates/core-utils/src/lib.rs @@ -5,7 +5,7 @@ use crate::scan_iterator::{ScanIterator, VecScanIterator}; use async_trait::async_trait; use bytes::Bytes; use iterable::IterableEntity; -use serde::{de::DeserializeOwned, Serialize}; +use serde::{Serialize, de::DeserializeOwned}; use serde_json::de; use serde_json::ser; use slatedb::Db as SlateDb; diff --git a/crates/utils/src/scan_iterator.rs b/crates/core-utils/src/scan_iterator.rs similarity index 100% rename from crates/utils/src/scan_iterator.rs rename to crates/core-utils/src/scan_iterator.rs diff --git a/crates/utils/src/snapshots/embucket_utils__test__db.snap b/crates/core-utils/src/snapshots/core_utils__test__db.snap similarity index 92% rename from crates/utils/src/snapshots/embucket_utils__test__db.snap rename to crates/core-utils/src/snapshots/core_utils__test__db.snap index 0c62e6419..00984ee57 100644 --- a/crates/utils/src/snapshots/embucket_utils__test__db.snap +++ b/crates/core-utils/src/snapshots/core_utils__test__db.snap @@ -1,5 +1,5 @@ --- -source: crates/utils/src/lib.rs +source: crates/core-utils/src/lib.rs expression: "(get_empty, get_after_put, get_after_delete, list_after_append,\nlist_after_remove)" --- ( diff --git a/crates/df-builtins/Cargo.toml b/crates/df-builtins/Cargo.toml new file mode 100644 index 000000000..d0b9a7909 --- /dev/null +++ b/crates/df-builtins/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "df-builtins" +version = "0.1.0" +edition = "2024" +license-file.workspace = true + +[dependencies] +chrono = { workspace = true } +datafusion = { workspace = true } +datafusion-common = { workspace = true } +datafusion-doc = { workspace = true } +datafusion-expr = { workspace = true } +datafusion-macros = { workspace = true } +datafusion-physical-plan = { workspace = true } +paste = "1" +serde = { workspace = true } +serde_json = { workspace = true } + +[dev-dependencies] +tokio = { workspace = true } + +[lints] +workspace = true diff --git a/crates/runtime/src/execution/datafusion/functions/aggregate/any_value.rs b/crates/df-builtins/src/aggregate/any_value.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/aggregate/any_value.rs rename to crates/df-builtins/src/aggregate/any_value.rs diff --git a/crates/runtime/src/execution/datafusion/functions/aggregate/booland_agg.rs b/crates/df-builtins/src/aggregate/booland_agg.rs similarity index 98% rename from crates/runtime/src/execution/datafusion/functions/aggregate/booland_agg.rs rename to crates/df-builtins/src/aggregate/booland_agg.rs index 5c28e848e..d15b551fd 100644 --- a/crates/runtime/src/execution/datafusion/functions/aggregate/booland_agg.rs +++ b/crates/df-builtins/src/aggregate/booland_agg.rs @@ -1,5 +1,5 @@ -use crate::execution::datafusion::functions::aggregate::macros::make_udaf_function; -use crate::execution::datafusion::functions::array_to_boolean; +use crate::aggregate::macros::make_udaf_function; +use crate::array_to_boolean; use datafusion::arrow::array::ArrayRef; use datafusion::arrow::datatypes::DataType; use datafusion::error::Result as DFResult; diff --git a/crates/runtime/src/execution/datafusion/functions/aggregate/boolor_agg.rs b/crates/df-builtins/src/aggregate/boolor_agg.rs similarity index 97% rename from crates/runtime/src/execution/datafusion/functions/aggregate/boolor_agg.rs rename to crates/df-builtins/src/aggregate/boolor_agg.rs index 6e09179de..95d76c3c1 100644 --- a/crates/runtime/src/execution/datafusion/functions/aggregate/boolor_agg.rs +++ b/crates/df-builtins/src/aggregate/boolor_agg.rs @@ -1,5 +1,5 @@ -use crate::execution::datafusion::functions::aggregate::macros::make_udaf_function; -use crate::execution::datafusion::functions::array_to_boolean; +use crate::aggregate::macros::make_udaf_function; +use crate::array_to_boolean; use datafusion::arrow::array::ArrayRef; use datafusion::arrow::datatypes::DataType; use datafusion::error::Result as DFResult; diff --git a/crates/runtime/src/execution/datafusion/functions/aggregate/boolxor_agg.rs b/crates/df-builtins/src/aggregate/boolxor_agg.rs similarity index 97% rename from crates/runtime/src/execution/datafusion/functions/aggregate/boolxor_agg.rs rename to crates/df-builtins/src/aggregate/boolxor_agg.rs index b9a147a50..9ee6d3f04 100644 --- a/crates/runtime/src/execution/datafusion/functions/aggregate/boolxor_agg.rs +++ b/crates/df-builtins/src/aggregate/boolxor_agg.rs @@ -1,5 +1,5 @@ -use crate::execution::datafusion::functions::aggregate::macros::make_udaf_function; -use crate::execution::datafusion::functions::array_to_boolean; +use crate::aggregate::macros::make_udaf_function; +use crate::array_to_boolean; use datafusion::arrow::array::ArrayRef; use datafusion::arrow::datatypes::DataType; use datafusion::error::Result as DFResult; diff --git a/crates/runtime/src/execution/datafusion/functions/aggregate/mod.rs b/crates/df-builtins/src/aggregate/mod.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/aggregate/mod.rs rename to crates/df-builtins/src/aggregate/mod.rs index 9513b3070..ef3907d94 100644 --- a/crates/runtime/src/execution/datafusion/functions/aggregate/mod.rs +++ b/crates/df-builtins/src/aggregate/mod.rs @@ -15,8 +15,8 @@ // specific language governing permissions and limitations // under the License. -use datafusion_expr::registry::FunctionRegistry; use datafusion_expr::AggregateUDF; +use datafusion_expr::registry::FunctionRegistry; use std::sync::Arc; pub mod any_value; diff --git a/crates/runtime/src/execution/datafusion/functions/aggregate/percentile_cont.rs b/crates/df-builtins/src/aggregate/percentile_cont.rs similarity index 96% rename from crates/runtime/src/execution/datafusion/functions/aggregate/percentile_cont.rs rename to crates/df-builtins/src/aggregate/percentile_cont.rs index 8d30c9668..ff32a31ab 100644 --- a/crates/runtime/src/execution/datafusion/functions/aggregate/percentile_cont.rs +++ b/crates/df-builtins/src/aggregate/percentile_cont.rs @@ -8,8 +8,8 @@ use std::sync::Arc; use datafusion::arrow::array::Array; use datafusion::arrow::array::{ArrayRef, RecordBatch}; use datafusion::arrow::datatypes::{DataType, Field, Schema}; +use datafusion_common::{Result, ScalarValue, plan_err}; use datafusion_common::{internal_err, not_impl_datafusion_err, not_impl_err}; -use datafusion_common::{plan_err, Result, ScalarValue}; use datafusion_expr::function::{AccumulatorArgs, StateFieldsArgs}; use datafusion_expr::type_coercion::aggregates::{INTEGERS, NUMERICS}; use datafusion_expr::utils::format_state_name; @@ -71,19 +71,18 @@ fn get_scalar_value(expr: &Arc) -> Result { } fn validate_input_percentile_expr(expr: &Arc) -> Result { - let percentile = match get_scalar_value(expr) - .map_err(|_| not_impl_datafusion_err!("Percentile value for 'PERCENTILE_CONT' must be a literal, got: {expr}"))? { - ScalarValue::Float32(Some(value)) => { - f64::from(value) - } - ScalarValue::Float64(Some(value)) => { - value - } + let percentile = match get_scalar_value(expr).map_err(|_| { + not_impl_datafusion_err!( + "Percentile value for 'PERCENTILE_CONT' must be a literal, got: {expr}" + ) + })? { + ScalarValue::Float32(Some(value)) => f64::from(value), + ScalarValue::Float64(Some(value)) => value, sv => { return not_impl_err!( "Percentile value for 'PERCENTILE_CONT' must be Float32 or Float64 literal (got data type {})", sv.data_type() - ) + ); } }; diff --git a/crates/runtime/src/execution/datafusion/functions/booland.rs b/crates/df-builtins/src/booland.rs similarity index 76% rename from crates/runtime/src/execution/datafusion/functions/booland.rs rename to crates/df-builtins/src/booland.rs index 1e368dad8..84c2497e5 100644 --- a/crates/runtime/src/execution/datafusion/functions/booland.rs +++ b/crates/df-builtins/src/booland.rs @@ -1,4 +1,4 @@ -use crate::execution::datafusion::functions::array_to_boolean; +use crate::array_to_boolean; use datafusion::arrow::array::builder::BooleanBuilder; use datafusion::arrow::datatypes::DataType; use datafusion::error::Result as DFResult; @@ -102,15 +102,15 @@ mod tests { let result = ctx.sql(q).await?.collect().await?; assert_batches_eq!( - &[ -"+-----------------------------+---------------------------------+----------------------------+------------------------+------------------------+--------------------+", -"| booland(Int64(1),Int64(-2)) | booland(Int64(0),Float64(2.35)) | booland(Int64(0),Int64(0)) | booland(Int64(0),NULL) | booland(NULL,Int64(3)) | booland(NULL,NULL) |", -"+-----------------------------+---------------------------------+----------------------------+------------------------+------------------------+--------------------+", -"| true | false | false | false | | |", -"+-----------------------------+---------------------------------+----------------------------+------------------------+------------------------+--------------------+", - ], - &result - ); + &[ + "+-----------------------------+---------------------------------+----------------------------+------------------------+------------------------+--------------------+", + "| booland(Int64(1),Int64(-2)) | booland(Int64(0),Float64(2.35)) | booland(Int64(0),Int64(0)) | booland(Int64(0),NULL) | booland(NULL,Int64(3)) | booland(NULL,NULL) |", + "+-----------------------------+---------------------------------+----------------------------+------------------------+------------------------+--------------------+", + "| true | false | false | false | | |", + "+-----------------------------+---------------------------------+----------------------------+------------------------+------------------------+--------------------+", + ], + &result + ); Ok(()) } diff --git a/crates/runtime/src/execution/datafusion/functions/boolor.rs b/crates/df-builtins/src/boolor.rs similarity index 75% rename from crates/runtime/src/execution/datafusion/functions/boolor.rs rename to crates/df-builtins/src/boolor.rs index e5b731328..abcf4a24a 100644 --- a/crates/runtime/src/execution/datafusion/functions/boolor.rs +++ b/crates/df-builtins/src/boolor.rs @@ -1,5 +1,5 @@ -use crate::execution::datafusion::functions::array_to_boolean; -use crate::execution::datafusion::functions::booland::is_true; +use crate::array_to_boolean; +use crate::booland::is_true; use datafusion::arrow::array::builder::BooleanBuilder; use datafusion::arrow::datatypes::DataType; use datafusion::error::Result as DFResult; @@ -99,15 +99,15 @@ mod tests { let result = ctx.sql(q).await?.collect().await?; assert_batches_eq!( - &[ -"+---------------------------+---------------------------------+-----------------------+---------------------------+-----------------------+-------------------+", -"| boolor(Int64(1),Int64(2)) | boolor(Float64(-1.35),Int64(0)) | boolor(Int64(3),NULL) | boolor(Int64(0),Int64(0)) | boolor(NULL,Int64(0)) | boolor(NULL,NULL) |", -"+---------------------------+---------------------------------+-----------------------+---------------------------+-----------------------+-------------------+", -"| true | true | true | false | | |", -"+---------------------------+---------------------------------+-----------------------+---------------------------+-----------------------+-------------------+", - ], - &result - ); + &[ + "+---------------------------+---------------------------------+-----------------------+---------------------------+-----------------------+-------------------+", + "| boolor(Int64(1),Int64(2)) | boolor(Float64(-1.35),Int64(0)) | boolor(Int64(3),NULL) | boolor(Int64(0),Int64(0)) | boolor(NULL,Int64(0)) | boolor(NULL,NULL) |", + "+---------------------------+---------------------------------+-----------------------+---------------------------+-----------------------+-------------------+", + "| true | true | true | false | | |", + "+---------------------------+---------------------------------+-----------------------+---------------------------+-----------------------+-------------------+", + ], + &result + ); Ok(()) } } diff --git a/crates/runtime/src/execution/datafusion/functions/boolxor.rs b/crates/df-builtins/src/boolxor.rs similarity index 76% rename from crates/runtime/src/execution/datafusion/functions/boolxor.rs rename to crates/df-builtins/src/boolxor.rs index a41cb0dfc..d7fbb78d1 100644 --- a/crates/runtime/src/execution/datafusion/functions/boolxor.rs +++ b/crates/df-builtins/src/boolxor.rs @@ -1,5 +1,5 @@ -use crate::execution::datafusion::functions::array_to_boolean; -use crate::execution::datafusion::functions::booland::is_true; +use crate::array_to_boolean; +use crate::booland::is_true; use datafusion::arrow::array::builder::BooleanBuilder; use datafusion::arrow::datatypes::DataType; use datafusion::error::Result as DFResult; @@ -106,15 +106,15 @@ mod tests { let result = ctx.sql(q).await?.collect().await?; assert_batches_eq!( - &[ -"+----------------------------+-----------------------------+----------------------------+------------------------+------------------------+--------------------+", -"| boolxor(Int64(2),Int64(0)) | boolxor(Int64(1),Int64(-1)) | boolxor(Int64(0),Int64(0)) | boolxor(NULL,Int64(3)) | boolxor(NULL,Int64(0)) | boolxor(NULL,NULL) |", -"+----------------------------+-----------------------------+----------------------------+------------------------+------------------------+--------------------+", -"| true | false | false | | | |", -"+----------------------------+-----------------------------+----------------------------+------------------------+------------------------+--------------------+", - ], - &result - ); + &[ + "+----------------------------+-----------------------------+----------------------------+------------------------+------------------------+--------------------+", + "| boolxor(Int64(2),Int64(0)) | boolxor(Int64(1),Int64(-1)) | boolxor(Int64(0),Int64(0)) | boolxor(NULL,Int64(3)) | boolxor(NULL,Int64(0)) | boolxor(NULL,NULL) |", + "+----------------------------+-----------------------------+----------------------------+------------------------+------------------------+--------------------+", + "| true | false | false | | | |", + "+----------------------------+-----------------------------+----------------------------+------------------------+------------------------+--------------------+", + ], + &result + ); Ok(()) } } diff --git a/crates/runtime/src/execution/datafusion/functions/convert_timezone.rs b/crates/df-builtins/src/convert_timezone.rs similarity index 99% rename from crates/runtime/src/execution/datafusion/functions/convert_timezone.rs rename to crates/df-builtins/src/convert_timezone.rs index 721fc27cc..f8d666c18 100644 --- a/crates/runtime/src/execution/datafusion/functions/convert_timezone.rs +++ b/crates/df-builtins/src/convert_timezone.rs @@ -2,10 +2,10 @@ use datafusion::arrow::array::timezone::Tz; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::datatypes::DataType::{Timestamp, Utf8}; use datafusion::arrow::datatypes::TimeUnit::{self, Microsecond, Millisecond, Nanosecond, Second}; -use datafusion::common::{internal_err, plan_err, Result}; +use datafusion::common::{Result, internal_err, plan_err}; use datafusion::logical_expr::TypeSignature::Exact; use datafusion::logical_expr::{ - ColumnarValue, ScalarUDFImpl, Signature, Volatility, TIMEZONE_WILDCARD, + ColumnarValue, ScalarUDFImpl, Signature, TIMEZONE_WILDCARD, Volatility, }; use datafusion::scalar::ScalarValue; use datafusion_expr::{ReturnInfo, ReturnTypeArgs}; diff --git a/crates/runtime/src/execution/datafusion/functions/date_add.rs b/crates/df-builtins/src/date_add.rs similarity index 95% rename from crates/runtime/src/execution/datafusion/functions/date_add.rs rename to crates/df-builtins/src/date_add.rs index e04d98e00..0262b0346 100644 --- a/crates/runtime/src/execution/datafusion/functions/date_add.rs +++ b/crates/df-builtins/src/date_add.rs @@ -2,12 +2,12 @@ use datafusion::arrow::array::{Array, ArrayRef}; use datafusion::arrow::compute::kernels::numeric::add_wrapping; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::datatypes::TimeUnit::Nanosecond; -use datafusion::common::{plan_err, Result}; +use datafusion::common::{Result, plan_err}; use datafusion::logical_expr::TypeSignature::Coercible; use datafusion::logical_expr::TypeSignatureClass; use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use datafusion::scalar::ScalarValue; -use datafusion_common::types::{logical_date, logical_int64, logical_string, NativeType}; +use datafusion_common::types::{NativeType, logical_date, logical_int64, logical_string}; use datafusion_expr::Coercion; use std::any::Any; use std::sync::Arc; @@ -226,8 +226,8 @@ mod tests { let fn_args = ScalarFunctionArgs { args, number_rows: 0, - return_type: &arrow_schema::DataType::Timestamp( - arrow_schema::TimeUnit::Microsecond, + return_type: &datafusion::arrow::datatypes::DataType::Timestamp( + datafusion::arrow::datatypes::TimeUnit::Microsecond, Some(Arc::from(String::from("+00").into_boxed_str())), ), }; @@ -259,8 +259,8 @@ mod tests { let fn_args = ScalarFunctionArgs { args, number_rows: 0, - return_type: &arrow_schema::DataType::Timestamp( - arrow_schema::TimeUnit::Microsecond, + return_type: &datafusion::arrow::datatypes::DataType::Timestamp( + datafusion::arrow::datatypes::TimeUnit::Microsecond, Some(Arc::from(String::from("+00").into_boxed_str())), ), }; @@ -294,8 +294,8 @@ mod tests { let fn_args = ScalarFunctionArgs { args, number_rows: 0, - return_type: &arrow_schema::DataType::Timestamp( - arrow_schema::TimeUnit::Microsecond, + return_type: &datafusion::arrow::datatypes::DataType::Timestamp( + datafusion::arrow::datatypes::TimeUnit::Microsecond, Some(Arc::from(String::from("+00").into_boxed_str())), ), }; diff --git a/crates/runtime/src/execution/datafusion/functions/date_diff.rs b/crates/df-builtins/src/date_diff.rs similarity index 98% rename from crates/runtime/src/execution/datafusion/functions/date_diff.rs rename to crates/df-builtins/src/date_diff.rs index f2a43fe97..35602f88f 100644 --- a/crates/runtime/src/execution/datafusion/functions/date_diff.rs +++ b/crates/df-builtins/src/date_diff.rs @@ -1,8 +1,8 @@ use datafusion::arrow::array::Array; -use datafusion::arrow::compute::{date_part, DatePart}; +use datafusion::arrow::compute::{DatePart, date_part}; use datafusion::arrow::datatypes::DataType; use datafusion::arrow::datatypes::DataType::Int64; -use datafusion::common::{plan_err, Result}; +use datafusion::common::{Result, plan_err}; use datafusion::logical_expr::TypeSignature::Coercible; use datafusion::logical_expr::TypeSignatureClass; use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; diff --git a/crates/runtime/src/execution/datafusion/functions/date_from_parts.rs b/crates/df-builtins/src/date_from_parts.rs similarity index 94% rename from crates/runtime/src/execution/datafusion/functions/date_from_parts.rs rename to crates/df-builtins/src/date_from_parts.rs index a9d6f2e0b..521966006 100644 --- a/crates/runtime/src/execution/datafusion/functions/date_from_parts.rs +++ b/crates/df-builtins/src/date_from_parts.rs @@ -1,9 +1,7 @@ use std::any::Any; use std::sync::Arc; -use crate::execution::datafusion::functions::timestamp_from_parts::{ - make_date, take_function_args, to_primitive_array, -}; +use crate::timestamp_from_parts::{make_date, take_function_args, to_primitive_array}; use datafusion::arrow::array::builder::PrimitiveBuilder; use datafusion::arrow::array::{Array, PrimitiveArray}; use datafusion::arrow::datatypes::DataType::{ @@ -139,10 +137,8 @@ super::macros::make_udf_function!(DateFromPartsFunc); #[cfg(test)] mod test { - use crate::execution::datafusion::functions::date_from_parts::DateFromPartsFunc; - use crate::execution::datafusion::functions::timestamp_from_parts::{ - to_primitive_array, UNIX_DAYS_FROM_CE, - }; + use crate::date_from_parts::DateFromPartsFunc; + use crate::timestamp_from_parts::{UNIX_DAYS_FROM_CE, to_primitive_array}; use chrono::NaiveDate; use datafusion::arrow::datatypes::Date32Type; use datafusion_common::ScalarValue; @@ -188,7 +184,7 @@ mod test { .invoke_with_args(datafusion_expr::ScalarFunctionArgs { args: fn_args, number_rows: 1, - return_type: &arrow::datatypes::DataType::Date32, + return_type: &datafusion::arrow::datatypes::DataType::Date32, }) .unwrap(); let result = to_primitive_array::(&result).unwrap(); diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/accessors/dim.rs b/crates/df-builtins/src/geospatial/accessors/dim.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/accessors/dim.rs rename to crates/df-builtins/src/geospatial/accessors/dim.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/accessors/geometry.rs b/crates/df-builtins/src/geospatial/accessors/geometry.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/accessors/geometry.rs rename to crates/df-builtins/src/geospatial/accessors/geometry.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/accessors/line_string.rs b/crates/df-builtins/src/geospatial/accessors/line_string.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/accessors/line_string.rs rename to crates/df-builtins/src/geospatial/accessors/line_string.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/accessors/mod.rs b/crates/df-builtins/src/geospatial/accessors/mod.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/accessors/mod.rs rename to crates/df-builtins/src/geospatial/accessors/mod.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/accessors/point.rs b/crates/df-builtins/src/geospatial/accessors/point.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/accessors/point.rs rename to crates/df-builtins/src/geospatial/accessors/point.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/accessors/srid.rs b/crates/df-builtins/src/geospatial/accessors/srid.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/accessors/srid.rs rename to crates/df-builtins/src/geospatial/accessors/srid.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/constructors/line_string.rs b/crates/df-builtins/src/geospatial/constructors/line_string.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/constructors/line_string.rs rename to crates/df-builtins/src/geospatial/constructors/line_string.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/constructors/mod.rs b/crates/df-builtins/src/geospatial/constructors/mod.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/constructors/mod.rs rename to crates/df-builtins/src/geospatial/constructors/mod.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/constructors/polygon.rs b/crates/df-builtins/src/geospatial/constructors/polygon.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/constructors/polygon.rs rename to crates/df-builtins/src/geospatial/constructors/polygon.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/data_types.rs b/crates/df-builtins/src/geospatial/data_types.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/data_types.rs rename to crates/df-builtins/src/geospatial/data_types.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/error.rs b/crates/df-builtins/src/geospatial/error.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/error.rs rename to crates/df-builtins/src/geospatial/error.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/measurement/area.rs b/crates/df-builtins/src/geospatial/measurement/area.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/measurement/area.rs rename to crates/df-builtins/src/geospatial/measurement/area.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/measurement/contains.rs b/crates/df-builtins/src/geospatial/measurement/contains.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/measurement/contains.rs rename to crates/df-builtins/src/geospatial/measurement/contains.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/measurement/distance.rs b/crates/df-builtins/src/geospatial/measurement/distance.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/measurement/distance.rs rename to crates/df-builtins/src/geospatial/measurement/distance.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/measurement/mod.rs b/crates/df-builtins/src/geospatial/measurement/mod.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/measurement/mod.rs rename to crates/df-builtins/src/geospatial/measurement/mod.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/measurement/within.rs b/crates/df-builtins/src/geospatial/measurement/within.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/measurement/within.rs rename to crates/df-builtins/src/geospatial/measurement/within.rs diff --git a/crates/runtime/src/execution/datafusion/functions/geospatial/mod.rs b/crates/df-builtins/src/geospatial/mod.rs similarity index 100% rename from crates/runtime/src/execution/datafusion/functions/geospatial/mod.rs rename to crates/df-builtins/src/geospatial/mod.rs diff --git a/crates/runtime/src/execution/datafusion/functions/iff.rs b/crates/df-builtins/src/iff.rs similarity index 96% rename from crates/runtime/src/execution/datafusion/functions/iff.rs rename to crates/df-builtins/src/iff.rs index 347eb3c3e..0ff6fd04e 100644 --- a/crates/runtime/src/execution/datafusion/functions/iff.rs +++ b/crates/df-builtins/src/iff.rs @@ -1,7 +1,7 @@ -use crate::execution::datafusion::functions::array_to_boolean; -use arrow_schema::DataType; +use crate::array_to_boolean; +use datafusion::arrow::datatypes::DataType; use datafusion::error::Result as DFResult; -use datafusion_common::{exec_err, ScalarValue}; +use datafusion_common::{ScalarValue, exec_err}; use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDFImpl, Signature, Volatility}; use std::any::Any; @@ -67,14 +67,14 @@ impl ScalarUDFImpl for IffFunc { let lhs = match &args.args[1] { ColumnarValue::Scalar(val) => val.to_owned(), ColumnarValue::Array(_) => { - return exec_err!("Iff function requires the second argument to be a scalar") + return exec_err!("Iff function requires the second argument to be a scalar"); } }; let rhs = match &args.args[2] { ColumnarValue::Scalar(val) => val.to_owned(), ColumnarValue::Array(_) => { - return exec_err!("Iff function requires the third argument to be a scalar") + return exec_err!("Iff function requires the third argument to be a scalar"); } }; diff --git a/crates/runtime/src/execution/datafusion/functions/mod.rs b/crates/df-builtins/src/lib.rs similarity index 94% rename from crates/runtime/src/execution/datafusion/functions/mod.rs rename to crates/df-builtins/src/lib.rs index 4582d8b42..ade09d69a 100644 --- a/crates/runtime/src/execution/datafusion/functions/mod.rs +++ b/crates/df-builtins/src/lib.rs @@ -1,9 +1,10 @@ -use crate::execution::datafusion::functions::to_boolean::ToBooleanFunc; -use crate::execution::datafusion::functions::to_time::ToTimeFunc; +pub use crate::aggregate::register_udafs; +use crate::to_boolean::ToBooleanFunc; +use crate::to_time::ToTimeFunc; use datafusion::arrow::array::{ Array, ArrayRef, ArrowNativeTypeOp, BooleanArray, Decimal128Array, Decimal256Array, - Float16Array, Float32Array, Float64Array, Int16Array, Int32Array, Int64Array, Int8Array, - StringViewArray, UInt16Array, UInt32Array, UInt64Array, UInt8Array, + Float16Array, Float32Array, Float64Array, Int8Array, Int16Array, Int32Array, Int64Array, + StringViewArray, UInt8Array, UInt16Array, UInt32Array, UInt64Array, }; use datafusion::arrow::datatypes::DataType; use datafusion::{common::Result, execution::FunctionRegistry, logical_expr::ScalarUDF}; @@ -142,7 +143,7 @@ pub(crate) fn array_to_boolean(arr: &ArrayRef) -> Result { return Err(DataFusionError::Internal(format!( "unsupported {:?} type. Only supports boolean, numeric, decimal, float types", arr.data_type() - ))) + ))); } }) } diff --git a/crates/runtime/src/execution/datafusion/functions/parse_json.rs b/crates/df-builtins/src/parse_json.rs similarity index 99% rename from crates/runtime/src/execution/datafusion/functions/parse_json.rs rename to crates/df-builtins/src/parse_json.rs index f0661277f..b41364bac 100644 --- a/crates/runtime/src/execution/datafusion/functions/parse_json.rs +++ b/crates/df-builtins/src/parse_json.rs @@ -2,7 +2,7 @@ use datafusion::arrow::array::{ Array, ArrayRef, BooleanArray, Float64Array, Int64Array, StringArray, StructArray, }; use datafusion::arrow::datatypes::{DataType, Field, Fields}; -use datafusion::common::{exec_err, Result}; +use datafusion::common::{Result, exec_err}; use datafusion::logical_expr::{ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use datafusion::scalar::ScalarValue; use serde_json::Value; diff --git a/crates/runtime/src/execution/datafusion/functions/table/flatten.rs b/crates/df-builtins/src/table/flatten.rs similarity index 97% rename from crates/runtime/src/execution/datafusion/functions/table/flatten.rs rename to crates/df-builtins/src/table/flatten.rs index 4cefd80e5..77d463ce4 100644 --- a/crates/runtime/src/execution/datafusion/functions/table/flatten.rs +++ b/crates/df-builtins/src/table/flatten.rs @@ -1,20 +1,3 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - use datafusion::arrow::array::builder::{StringBuilder, UInt64Builder}; use datafusion::arrow::array::{ArrayRef, RecordBatch, StringArray, UInt64Array}; use datafusion::arrow::datatypes::{DataType, Field, Schema, SchemaRef}; @@ -22,14 +5,14 @@ use datafusion::catalog::{TableFunctionImpl, TableProvider}; use datafusion::datasource::MemTable; use datafusion::physical_expr::create_physical_expr; use datafusion::physical_plan::ColumnarValue; -use datafusion_common::{exec_err, DFSchema, DataFusionError, Result as DFResult, ScalarValue}; -use datafusion_expr::execution_props::ExecutionProps; +use datafusion_common::{DFSchema, DataFusionError, Result as DFResult, ScalarValue, exec_err}; use datafusion_expr::Expr; +use datafusion_expr::execution_props::ExecutionProps; use serde_json::Value; use std::cell::RefCell; use std::rc::Rc; -use std::sync::atomic::{AtomicU64, Ordering}; use std::sync::Arc; +use std::sync::atomic::{AtomicU64, Ordering}; #[derive(Debug)] enum Mode { @@ -563,7 +546,7 @@ fn get_args(args: &[&Expr]) -> DFResult { #[cfg(test)] mod tests { use super::*; - use crate::execution::datafusion::functions::parse_json::ParseJsonFunc; + use crate::parse_json::ParseJsonFunc; use datafusion::prelude::SessionContext; use datafusion_common::assert_batches_eq; use std::sync::Arc; diff --git a/crates/runtime/src/execution/datafusion/functions/table/mod.rs b/crates/df-builtins/src/table/mod.rs similarity index 71% rename from crates/runtime/src/execution/datafusion/functions/table/mod.rs rename to crates/df-builtins/src/table/mod.rs index 66404f3fe..e6a62af12 100644 --- a/crates/runtime/src/execution/datafusion/functions/table/mod.rs +++ b/crates/df-builtins/src/table/mod.rs @@ -1,4 +1,4 @@ -use crate::execution::datafusion::functions::table::flatten::FlattenTableFunc; +use crate::table::flatten::FlattenTableFunc; use datafusion::prelude::SessionContext; use std::sync::Arc; diff --git a/crates/runtime/src/execution/datafusion/functions/time_from_parts.rs b/crates/df-builtins/src/time_from_parts.rs similarity index 94% rename from crates/runtime/src/execution/datafusion/functions/time_from_parts.rs rename to crates/df-builtins/src/time_from_parts.rs index 051fcb269..bdb64bebb 100644 --- a/crates/runtime/src/execution/datafusion/functions/time_from_parts.rs +++ b/crates/df-builtins/src/time_from_parts.rs @@ -1,9 +1,7 @@ use std::any::Any; use std::sync::Arc; -use crate::execution::datafusion::functions::timestamp_from_parts::{ - make_time, take_function_args, to_primitive_array, -}; +use crate::timestamp_from_parts::{make_time, take_function_args, to_primitive_array}; use datafusion::arrow::array::builder::PrimitiveBuilder; use datafusion::arrow::array::{Array, PrimitiveArray}; use datafusion::arrow::datatypes::DataType::{Int64, Time64}; @@ -12,7 +10,7 @@ use datafusion::arrow::datatypes::{DataType, Int64Type, Time64NanosecondType}; use datafusion::logical_expr::TypeSignature::Coercible; use datafusion::logical_expr::TypeSignatureClass; use datafusion_common::types::logical_int64; -use datafusion_common::{internal_err, Result, ScalarValue}; +use datafusion_common::{Result, ScalarValue, internal_err}; use datafusion_expr::{Coercion, ColumnarValue, ScalarUDFImpl, Signature, Volatility}; use datafusion_macros::user_doc; @@ -165,8 +163,8 @@ super::macros::make_udf_function!(TimeFromPartsFunc); #[cfg(test)] mod test { - use crate::execution::datafusion::functions::time_from_parts::TimeFromPartsFunc; - use crate::execution::datafusion::functions::timestamp_from_parts::to_primitive_array; + use crate::time_from_parts::TimeFromPartsFunc; + use crate::timestamp_from_parts::to_primitive_array; use chrono::NaiveTime; use datafusion::arrow::datatypes::Time64NanosecondType; use datafusion::logical_expr::ColumnarValue; @@ -215,8 +213,8 @@ mod test { .invoke_with_args(datafusion_expr::ScalarFunctionArgs { args: fn_args, number_rows: 1, - return_type: &arrow::datatypes::DataType::Time64( - arrow_schema::TimeUnit::Nanosecond, + return_type: &datafusion::arrow::datatypes::DataType::Time64( + datafusion::arrow::datatypes::TimeUnit::Nanosecond, ), }) .unwrap(); diff --git a/crates/runtime/src/execution/datafusion/functions/timestamp_from_parts.rs b/crates/df-builtins/src/timestamp_from_parts.rs similarity index 96% rename from crates/runtime/src/execution/datafusion/functions/timestamp_from_parts.rs rename to crates/df-builtins/src/timestamp_from_parts.rs index e73de7f9d..b89a8f3eb 100644 --- a/crates/runtime/src/execution/datafusion/functions/timestamp_from_parts.rs +++ b/crates/df-builtins/src/timestamp_from_parts.rs @@ -14,7 +14,7 @@ use datafusion::arrow::datatypes::{ use datafusion::logical_expr::TypeSignature::Coercible; use datafusion::logical_expr::TypeSignatureClass; use datafusion_common::types::{logical_date, logical_int64, logical_string}; -use datafusion_common::{exec_err, internal_err, Result, ScalarValue, _exec_datafusion_err}; +use datafusion_common::{_exec_datafusion_err, Result, ScalarValue, exec_err, internal_err}; use datafusion_expr::{ Coercion, ColumnarValue, ReturnInfo, ReturnTypeArgs, ScalarUDFImpl, Signature, Volatility, }; @@ -228,8 +228,16 @@ fn timestamps_from_components( ) -> Result> { let (years, months, days, hours, minutes, seconds, nanoseconds, time_zone) = match args.len() { 8 => { - let [years, months, days, hours, minutes, seconds, nanoseconds, time_zone] = - take_function_args("timestamp_from_parts", args)?; + let [ + years, + months, + days, + hours, + minutes, + seconds, + nanoseconds, + time_zone, + ] = take_function_args("timestamp_from_parts", args)?; ( years, months, @@ -388,7 +396,7 @@ pub fn take_function_args( pub fn to_primitive_array(col: &ColumnarValue) -> Result> where - T: arrow::datatypes::ArrowPrimitiveType, + T: datafusion::arrow::datatypes::ArrowPrimitiveType, { match col { ColumnarValue::Array(array) => Ok(array.as_primitive::().to_owned()), @@ -421,9 +429,7 @@ super::macros::make_udf_function!(TimestampFromPartsFunc); #[cfg(test)] mod test { - use crate::execution::datafusion::functions::timestamp_from_parts::{ - to_primitive_array, TimestampFromPartsFunc, - }; + use crate::timestamp_from_parts::{TimestampFromPartsFunc, to_primitive_array}; use chrono::DateTime; use datafusion::arrow::datatypes::TimestampNanosecondType; use datafusion::logical_expr::ColumnarValue; @@ -558,8 +564,8 @@ mod test { .invoke_with_args(datafusion_expr::ScalarFunctionArgs { args: fn_args, number_rows: 1, - return_type: &arrow::datatypes::DataType::Timestamp( - arrow_schema::TimeUnit::Nanosecond, + return_type: &datafusion::arrow::datatypes::DataType::Timestamp( + datafusion::arrow::datatypes::TimeUnit::Nanosecond, None, ), }) @@ -593,8 +599,8 @@ mod test { .invoke_with_args(datafusion_expr::ScalarFunctionArgs { args: fn_args, number_rows: 1, - return_type: &arrow::datatypes::DataType::Timestamp( - arrow_schema::TimeUnit::Nanosecond, + return_type: &datafusion::arrow::datatypes::DataType::Timestamp( + datafusion::arrow::datatypes::TimeUnit::Nanosecond, None, ), }) diff --git a/crates/runtime/src/execution/datafusion/functions/to_boolean.rs b/crates/df-builtins/src/to_boolean.rs similarity index 99% rename from crates/runtime/src/execution/datafusion/functions/to_boolean.rs rename to crates/df-builtins/src/to_boolean.rs index 06e4f0c6c..44de6f868 100644 --- a/crates/runtime/src/execution/datafusion/functions/to_boolean.rs +++ b/crates/df-builtins/src/to_boolean.rs @@ -1,7 +1,7 @@ -use crate::execution::datafusion::functions::array_to_boolean; +use crate::array_to_boolean; +use datafusion::arrow::array::Array; use datafusion::arrow::array::builder::BooleanBuilder; use datafusion::arrow::array::cast::as_string_array; -use datafusion::arrow::array::Array; use datafusion::arrow::datatypes::DataType; use datafusion::error::Result as DFResult; use datafusion::logical_expr::{ColumnarValue, Signature, TypeSignature, Volatility}; diff --git a/crates/runtime/src/execution/datafusion/functions/to_time.rs b/crates/df-builtins/src/to_time.rs similarity index 99% rename from crates/runtime/src/execution/datafusion/functions/to_time.rs rename to crates/df-builtins/src/to_time.rs index 6a3a21b7c..8c104a612 100644 --- a/crates/runtime/src/execution/datafusion/functions/to_time.rs +++ b/crates/df-builtins/src/to_time.rs @@ -9,7 +9,7 @@ use datafusion::arrow::compute::kernels::cast_utils::Parser; use datafusion::arrow::datatypes::{DataType, TimeUnit}; use datafusion::error::Result as DFResult; use datafusion::logical_expr::ColumnarValue; -use datafusion_common::{exec_err, ScalarValue}; +use datafusion_common::{ScalarValue, exec_err}; use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl, Signature, TypeSignature, Volatility}; use std::any::Any; use std::sync::Arc; diff --git a/crates/df-catalog/Cargo.toml b/crates/df-catalog/Cargo.toml new file mode 100644 index 000000000..7c13a87fc --- /dev/null +++ b/crates/df-catalog/Cargo.toml @@ -0,0 +1,32 @@ +[package] +name = "df-catalog" +version = "0.1.0" +edition = "2024" +license-file.workspace = true + +[dependencies] +core-utils = { path = "../core-utils" } +core-metastore = { path = "../core-metastore" } +async-trait = { workspace = true } +aws-config = { workspace = true } +aws-credential-types = { workspace = true } +dashmap = { workspace = true } +datafusion = { workspace = true } +datafusion-common = { workspace = true } +datafusion-doc = { workspace = true } +datafusion-expr = { workspace = true } +datafusion-physical-plan = { workspace = true } +datafusion_iceberg = { workspace = true } +futures = { workspace = true } + +iceberg-rust = { workspace = true } +iceberg-rust-spec = { workspace = true } +iceberg-s3tables-catalog = { workspace = true } +object_store = { workspace = true } +once_cell = { version = "1.20.2" } +snafu = { workspace = true } +tokio = { workspace = true } +url = { workspace = true } + +[lints] +workspace = true diff --git a/crates/runtime/src/execution/catalog/catalog.rs b/crates/df-catalog/src/catalog.rs similarity index 97% rename from crates/runtime/src/execution/catalog/catalog.rs rename to crates/df-catalog/src/catalog.rs index ae83ed200..1446d5002 100644 --- a/crates/runtime/src/execution/catalog/catalog.rs +++ b/crates/df-catalog/src/catalog.rs @@ -1,4 +1,4 @@ -use crate::execution::catalog::schema::CachingSchema; +use crate::schema::CachingSchema; use dashmap::DashMap; use datafusion::catalog::{CatalogProvider, SchemaProvider}; use std::{any::Any, sync::Arc}; diff --git a/crates/runtime/src/execution/catalog/catalog_list.rs b/crates/df-catalog/src/catalog_list.rs similarity index 86% rename from crates/runtime/src/execution/catalog/catalog_list.rs rename to crates/df-catalog/src/catalog_list.rs index 2b24a7999..7e961dcc5 100644 --- a/crates/runtime/src/execution/catalog/catalog_list.rs +++ b/crates/df-catalog/src/catalog_list.rs @@ -1,12 +1,14 @@ use super::catalogs::embucket::catalog::EmbucketCatalog; use super::catalogs::embucket::iceberg_catalog::EmbucketIcebergCatalog; -use crate::execution::catalog::catalog::CachingCatalog; -use crate::execution::catalog::schema::CachingSchema; -use crate::execution::catalog::table::CachingTable; -use crate::execution::error::{self as ex_error, ExecutionError, ExecutionResult}; +use crate::catalog::CachingCatalog; +use crate::error::{DataFusionSnafu, Error, MetastoreSnafu, Result, S3TablesSnafu}; +use crate::schema::CachingSchema; +use crate::table::CachingTable; use aws_config::{BehaviorVersion, Region, SdkConfig}; -use aws_credential_types::provider::SharedCredentialsProvider; use aws_credential_types::Credentials; +use aws_credential_types::provider::SharedCredentialsProvider; +use core_metastore::{AwsCredentials, Metastore, VolumeType as MetastoreVolumeType}; +use core_utils::scan_iterator::ScanIterator; use dashmap::DashMap; use datafusion::{ catalog::{CatalogProvider, CatalogProviderList}, @@ -14,14 +16,10 @@ use datafusion::{ }; use datafusion_common::DataFusionError; use datafusion_iceberg::catalog::catalog::IcebergCatalog as DataFusionIcebergCatalog; -use embucket_metastore::{ - error::MetastoreError, AwsCredentials, Metastore, VolumeType as MetastoreVolumeType, -}; -use embucket_utils::scan_iterator::ScanIterator; use iceberg_rust::object_store::ObjectStoreBuilder; use iceberg_s3tables_catalog::S3TablesCatalog; -use object_store::local::LocalFileSystem; use object_store::ObjectStore; +use object_store::local::LocalFileSystem; use snafu::ResultExt; use std::any::Any; use std::sync::Arc; @@ -62,7 +60,7 @@ impl EmbucketCatalogList { /// - Metastore access (e.g., during database or volume listing) /// - Iceberg or S3 catalog initialization /// - `DataFusion` catalog wrapping or setup failures - pub async fn register_catalogs(&self) -> ExecutionResult<()> { + pub async fn register_catalogs(&self) -> Result<()> { // Internal catalogs let mut catalogs = self.internal_catalogs().await?; @@ -76,19 +74,17 @@ impl EmbucketCatalogList { Ok(()) } - pub async fn internal_catalogs(&self) -> ExecutionResult> { + pub async fn internal_catalogs(&self) -> Result> { self.metastore .iter_databases() .collect() .await - .map_err(|e| ExecutionError::Metastore { - source: MetastoreError::UtilSlateDB { source: e }, - })? + .map_err(|e| Error::Core { source: e })? .into_iter() .map(|db| { let iceberg_catalog = EmbucketIcebergCatalog::new(self.metastore.clone(), db.ident.clone()) - .context(ex_error::MetastoreSnafu)?; + .context(MetastoreSnafu)?; let catalog: Arc = Arc::new(EmbucketCatalog { database: db.ident.clone(), metastore: self.metastore.clone(), @@ -104,15 +100,13 @@ impl EmbucketCatalogList { .collect() } - pub async fn external_catalogs(&self) -> ExecutionResult> { + pub async fn external_catalogs(&self) -> Result> { let volumes = self .metastore .iter_volumes() .collect() .await - .map_err(|e| ExecutionError::Metastore { - source: MetastoreError::UtilSlateDB { source: e }, - })? + .map_err(|e| Error::Core { source: e })? .into_iter() .filter_map(|v| match v.volume.clone() { MetastoreVolumeType::S3Tables(s3) => Some(s3), @@ -146,11 +140,11 @@ impl EmbucketCatalogList { volume.arn.as_str(), ObjectStoreBuilder::S3(volume.s3_builder()), ) - .context(ex_error::S3TablesSnafu)?; + .context(S3TablesSnafu)?; let catalog = DataFusionIcebergCatalog::new(Arc::new(catalog), None) .await - .context(ex_error::DataFusionSnafu)?; + .context(DataFusionSnafu)?; catalogs.push(CachingCatalog { catalog: Arc::new(catalog), schemas_cache: DashMap::new(), @@ -162,7 +156,7 @@ impl EmbucketCatalogList { } #[allow(clippy::as_conversions, clippy::too_many_lines)] - pub async fn refresh(&self) -> ExecutionResult<()> { + pub async fn refresh(&self) -> Result<()> { for catalog in self.catalogs.iter_mut() { if catalog.should_refresh { let schemas = catalog.schema_names(); @@ -175,11 +169,8 @@ impl EmbucketCatalogList { }; let tables = schema.schema.table_names(); for table in tables { - if let Some(table_provider) = schema - .schema - .table(&table) - .await - .context(ex_error::DataFusionSnafu)? + if let Some(table_provider) = + schema.schema.table(&table).await.context(DataFusionSnafu)? { schema.tables_cache.insert( table.clone(), diff --git a/crates/runtime/src/execution/catalog/catalogs/embucket/catalog.rs b/crates/df-catalog/src/catalogs/embucket/catalog.rs similarity index 93% rename from crates/runtime/src/execution/catalog/catalogs/embucket/catalog.rs rename to crates/df-catalog/src/catalogs/embucket/catalog.rs index 9770cc77f..a4f6c6b4a 100644 --- a/crates/runtime/src/execution/catalog/catalogs/embucket/catalog.rs +++ b/crates/df-catalog/src/catalogs/embucket/catalog.rs @@ -1,8 +1,8 @@ use super::schema::EmbucketSchema; -use crate::execution::catalog::catalogs::embucket::block_in_new_runtime; +use crate::catalogs::embucket::block_in_new_runtime; +use core_metastore::{Metastore, SchemaIdent}; +use core_utils::scan_iterator::ScanIterator; use datafusion::catalog::{CatalogProvider, SchemaProvider}; -use embucket_metastore::{Metastore, SchemaIdent}; -use embucket_utils::scan_iterator::ScanIterator; use iceberg_rust::catalog::Catalog as IcebergCatalog; use std::{any::Any, sync::Arc}; diff --git a/crates/runtime/src/execution/catalog/catalogs/embucket/iceberg_catalog.rs b/crates/df-catalog/src/catalogs/embucket/iceberg_catalog.rs similarity index 98% rename from crates/runtime/src/execution/catalog/catalogs/embucket/iceberg_catalog.rs rename to crates/df-catalog/src/catalogs/embucket/iceberg_catalog.rs index eb0a3ffc3..a153323c4 100644 --- a/crates/runtime/src/execution/catalog/catalogs/embucket/iceberg_catalog.rs +++ b/crates/df-catalog/src/catalogs/embucket/iceberg_catalog.rs @@ -1,23 +1,23 @@ use std::{collections::HashMap, sync::Arc}; use async_trait::async_trait; -use embucket_metastore::error::{MetastoreError, MetastoreResult}; -use embucket_metastore::{ +use core_metastore::error::{MetastoreError, MetastoreResult}; +use core_metastore::{ Metastore, Schema as MetastoreSchema, SchemaIdent as MetastoreSchemaIdent, TableCreateRequest as MetastoreTableCreateRequest, TableIdent as MetastoreTableIdent, TableUpdate as MetastoreTableUpdate, }; -use embucket_utils::scan_iterator::ScanIterator; +use core_utils::scan_iterator::ScanIterator; use futures::executor::block_on; use iceberg_rust::{ catalog::{ + Catalog as IcebergCatalog, commit::{CommitTable as IcebergCommitTable, CommitView as IcebergCommitView}, create::{ CreateMaterializedView as IcebergCreateMaterializedView, CreateTable as IcebergCreateTable, CreateView as IcebergCreateView, }, tabular::Tabular as IcebergTabular, - Catalog as IcebergCatalog, }, error::Error as IcebergError, materialized_view::MaterializedView as IcebergMaterializedView, @@ -29,7 +29,6 @@ use iceberg_rust_spec::{ identifier::FullIdentifier as IcebergFullIdentifier, namespace::Namespace as IcebergNamespace, }; use object_store::ObjectStore; -use snafu::ResultExt; #[derive(Debug)] pub struct EmbucketIcebergCatalog { @@ -364,11 +363,12 @@ impl IcebergCatalog for EmbucketIcebergCatalog { properties: None, }; + // TODO: restore .context let table = self .metastore .create_table(&ident, table_create_request) .await - .context(crate::execution::error::MetastoreSnafu) + // .context(crate::execution::error::MetastoreSnafu) .map_err(|e| IcebergError::External(Box::new(e)))?; Ok(IcebergTable::new( identifier.clone(), diff --git a/crates/runtime/src/execution/catalog/catalogs/embucket/mod.rs b/crates/df-catalog/src/catalogs/embucket/mod.rs similarity index 100% rename from crates/runtime/src/execution/catalog/catalogs/embucket/mod.rs rename to crates/df-catalog/src/catalogs/embucket/mod.rs diff --git a/crates/runtime/src/execution/catalog/catalogs/embucket/schema.rs b/crates/df-catalog/src/catalogs/embucket/schema.rs similarity index 94% rename from crates/runtime/src/execution/catalog/catalogs/embucket/schema.rs rename to crates/df-catalog/src/catalogs/embucket/schema.rs index 54a2b4bba..3f1d23916 100644 --- a/crates/runtime/src/execution/catalog/catalogs/embucket/schema.rs +++ b/crates/df-catalog/src/catalogs/embucket/schema.rs @@ -1,11 +1,11 @@ -use crate::execution::catalog::catalogs::embucket::block_in_new_runtime; +use crate::catalogs::embucket::block_in_new_runtime; use async_trait::async_trait; +use core_metastore::error::MetastoreError; +use core_metastore::{Metastore, SchemaIdent, TableIdent}; +use core_utils::scan_iterator::ScanIterator; use datafusion::catalog::{SchemaProvider, TableProvider}; use datafusion_common::DataFusionError; use datafusion_iceberg::DataFusionTable as IcebergDataFusionTable; -use embucket_metastore::error::MetastoreError; -use embucket_metastore::{Metastore, SchemaIdent, TableIdent}; -use embucket_utils::scan_iterator::ScanIterator; use iceberg_rust::catalog::Catalog as IcebergCatalog; use iceberg_rust::{catalog::tabular::Tabular as IcebergTabular, table::Table as IcebergTable}; use std::any::Any; diff --git a/crates/runtime/src/execution/catalog/catalogs/mod.rs b/crates/df-catalog/src/catalogs/mod.rs similarity index 100% rename from crates/runtime/src/execution/catalog/catalogs/mod.rs rename to crates/df-catalog/src/catalogs/mod.rs diff --git a/crates/df-catalog/src/error.rs b/crates/df-catalog/src/error.rs new file mode 100644 index 000000000..6b552d7b4 --- /dev/null +++ b/crates/df-catalog/src/error.rs @@ -0,0 +1,23 @@ +use core_metastore::error::MetastoreError; +use core_utils::Error as CoreError; +use datafusion_common::DataFusionError; +use iceberg_s3tables_catalog::error::Error as S3TablesError; +use snafu::prelude::*; + +#[derive(Debug, Snafu)] +#[snafu(visibility(pub(crate)))] +pub enum Error { + #[snafu(display("Metastore error: {source}"))] + Metastore { source: MetastoreError }, + + #[snafu(display("Core error: {source}"))] + Core { source: CoreError }, + + #[snafu(display("DataFusion error: {source}"))] + DataFusion { source: DataFusionError }, + + #[snafu(display("S3Tables error: {source}"))] + S3Tables { source: S3TablesError }, +} + +pub type Result = std::result::Result; diff --git a/crates/runtime/src/execution/catalog/information_schema/columns.rs b/crates/df-catalog/src/information_schema/columns.rs similarity index 97% rename from crates/runtime/src/execution/catalog/information_schema/columns.rs rename to crates/df-catalog/src/information_schema/columns.rs index f2fff38e9..2ad1743ab 100644 --- a/crates/runtime/src/execution/catalog/information_schema/columns.rs +++ b/crates/df-catalog/src/information_schema/columns.rs @@ -2,7 +2,11 @@ //! //! [Information Schema Column]: https://docs.snowflake.com/en/sql-reference/info-schema/columns -use crate::execution::catalog::information_schema::config::InformationSchemaConfig; +use crate::information_schema::config::InformationSchemaConfig; +use DataType::{ + Binary, Decimal128, Float16, Float32, Float64, Int8, Int16, Int32, LargeBinary, LargeUtf8, + UInt8, UInt16, UInt32, UInt64, Utf8, +}; use datafusion::arrow::array::builder::UInt64Builder; use datafusion::arrow::error::ArrowError; use datafusion::arrow::{ @@ -12,15 +16,11 @@ use datafusion::arrow::{ }; use datafusion::execution::TaskContext; use datafusion_common::DataFusionError; +use datafusion_physical_plan::SendableRecordBatchStream; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::streaming::PartitionStream; -use datafusion_physical_plan::SendableRecordBatchStream; use std::fmt::Debug; use std::sync::Arc; -use DataType::{ - Binary, Decimal128, Float16, Float32, Float64, Int16, Int32, Int8, LargeBinary, LargeUtf8, - UInt16, UInt32, UInt64, UInt8, Utf8, -}; #[derive(Debug)] pub struct InformationSchemaColumns { diff --git a/crates/runtime/src/execution/catalog/information_schema/config.rs b/crates/df-catalog/src/information_schema/config.rs similarity index 98% rename from crates/runtime/src/execution/catalog/information_schema/config.rs rename to crates/df-catalog/src/information_schema/config.rs index 2c1f3ff80..33823dd1e 100644 --- a/crates/runtime/src/execution/catalog/information_schema/config.rs +++ b/crates/df-catalog/src/information_schema/config.rs @@ -2,18 +2,18 @@ use super::columns::InformationSchemaColumnsBuilder; use super::df_settings::InformationSchemaDfSettingsBuilder; use super::information_schema::{INFORMATION_SCHEMA, INFORMATION_SCHEMA_TABLES}; use super::parameters::{ - get_udaf_args_and_return_types, get_udf_args_and_return_types, get_udwf_args_and_return_types, - InformationSchemaParametersBuilder, + InformationSchemaParametersBuilder, get_udaf_args_and_return_types, + get_udf_args_and_return_types, get_udwf_args_and_return_types, }; use super::routines::InformationSchemaRoutinesBuilder; use super::schemata::InformationSchemataBuilder; use super::tables::InformationSchemaTablesBuilder; use super::views::InformationSchemaViewBuilder; -use crate::execution::catalog::information_schema::databases::InformationSchemaDatabasesBuilder; +use crate::information_schema::databases::InformationSchemaDatabasesBuilder; use datafusion::catalog::CatalogProviderList; use datafusion::logical_expr::{Signature, TypeSignature, Volatility}; -use datafusion_common::config::ConfigOptions; use datafusion_common::DataFusionError; +use datafusion_common::config::ConfigOptions; use datafusion_doc::Documentation; use datafusion_expr::{AggregateUDF, ScalarUDF, TableType, WindowUDF}; use std::collections::HashMap; diff --git a/crates/runtime/src/execution/catalog/information_schema/databases.rs b/crates/df-catalog/src/information_schema/databases.rs similarity index 97% rename from crates/runtime/src/execution/catalog/information_schema/databases.rs rename to crates/df-catalog/src/information_schema/databases.rs index a46099665..36fd740cb 100644 --- a/crates/runtime/src/execution/catalog/information_schema/databases.rs +++ b/crates/df-catalog/src/information_schema/databases.rs @@ -2,7 +2,7 @@ //! //! [Information Schema Databases]: https://docs.snowflake.com/en/sql-reference/info-schema/databases -use crate::execution::catalog::information_schema::config::InformationSchemaConfig; +use crate::information_schema::config::InformationSchemaConfig; use datafusion::arrow::error::ArrowError; use datafusion::arrow::{ array::StringBuilder, @@ -11,9 +11,9 @@ use datafusion::arrow::{ }; use datafusion::execution::TaskContext; use datafusion_common::DataFusionError; +use datafusion_physical_plan::SendableRecordBatchStream; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::streaming::PartitionStream; -use datafusion_physical_plan::SendableRecordBatchStream; use std::fmt::Debug; use std::sync::Arc; diff --git a/crates/runtime/src/execution/catalog/information_schema/df_settings.rs b/crates/df-catalog/src/information_schema/df_settings.rs similarity index 96% rename from crates/runtime/src/execution/catalog/information_schema/df_settings.rs rename to crates/df-catalog/src/information_schema/df_settings.rs index 099c8ec57..19ef57ec4 100644 --- a/crates/runtime/src/execution/catalog/information_schema/df_settings.rs +++ b/crates/df-catalog/src/information_schema/df_settings.rs @@ -1,4 +1,4 @@ -use crate::execution::catalog::information_schema::config::InformationSchemaConfig; +use crate::information_schema::config::InformationSchemaConfig; use datafusion::arrow::error::ArrowError; use datafusion::arrow::{ array::StringBuilder, @@ -6,11 +6,11 @@ use datafusion::arrow::{ record_batch::RecordBatch, }; use datafusion::execution::TaskContext; -use datafusion_common::config::ConfigEntry; use datafusion_common::DataFusionError; +use datafusion_common::config::ConfigEntry; +use datafusion_physical_plan::SendableRecordBatchStream; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::streaming::PartitionStream; -use datafusion_physical_plan::SendableRecordBatchStream; use std::fmt::Debug; use std::sync::Arc; diff --git a/crates/runtime/src/execution/catalog/information_schema/information_schema.rs b/crates/df-catalog/src/information_schema/information_schema.rs similarity index 84% rename from crates/runtime/src/execution/catalog/information_schema/information_schema.rs rename to crates/df-catalog/src/information_schema/information_schema.rs index 5cc3b6638..58fce0cd1 100644 --- a/crates/runtime/src/execution/catalog/information_schema/information_schema.rs +++ b/crates/df-catalog/src/information_schema/information_schema.rs @@ -2,21 +2,20 @@ //! //! [Information Schema]: https://docs.snowflake.com/en/sql-reference/info-schema -use super::config::InformationSchemaConfig; -use super::schemata::InformationSchemata; -use super::tables::InformationSchemaTables; -use super::views::InformationSchemaViews; - -use crate::execution::catalog::information_schema::columns::InformationSchemaColumns; -use crate::execution::catalog::information_schema::databases::InformationSchemaDatabases; -use crate::execution::catalog::information_schema::df_settings::InformationSchemaDfSettings; -use crate::execution::catalog::information_schema::parameters::InformationSchemaParameters; -use crate::execution::catalog::information_schema::routines::InformationSchemaRoutines; +use crate::information_schema::columns::InformationSchemaColumns; +use crate::information_schema::config::InformationSchemaConfig; +use crate::information_schema::databases::InformationSchemaDatabases; +use crate::information_schema::df_settings::InformationSchemaDfSettings; +use crate::information_schema::parameters::InformationSchemaParameters; +use crate::information_schema::routines::InformationSchemaRoutines; +use crate::information_schema::schemata::InformationSchemata; +use crate::information_schema::tables::InformationSchemaTables; +use crate::information_schema::views::InformationSchemaViews; use async_trait::async_trait; use datafusion::catalog::streaming::StreamingTable; use datafusion::catalog::{CatalogProviderList, SchemaProvider, TableProvider}; -use datafusion_common::error::Result; use datafusion_common::DataFusionError; +use datafusion_common::error::Result; use datafusion_physical_plan::streaming::PartitionStream; use std::fmt::Debug; use std::{any::Any, sync::Arc}; diff --git a/crates/runtime/src/execution/catalog/information_schema/mod.rs b/crates/df-catalog/src/information_schema/mod.rs similarity index 100% rename from crates/runtime/src/execution/catalog/information_schema/mod.rs rename to crates/df-catalog/src/information_schema/mod.rs diff --git a/crates/runtime/src/execution/catalog/information_schema/parameters.rs b/crates/df-catalog/src/information_schema/parameters.rs similarity index 97% rename from crates/runtime/src/execution/catalog/information_schema/parameters.rs rename to crates/df-catalog/src/information_schema/parameters.rs index faf4c6ea3..d4947d2a4 100644 --- a/crates/runtime/src/execution/catalog/information_schema/parameters.rs +++ b/crates/df-catalog/src/information_schema/parameters.rs @@ -1,5 +1,5 @@ -use crate::execution::catalog::information_schema::config::InformationSchemaConfig; -use datafusion::arrow::array::builder::{BooleanBuilder, UInt64Builder, UInt8Builder}; +use crate::information_schema::config::InformationSchemaConfig; +use datafusion::arrow::array::builder::{BooleanBuilder, UInt8Builder, UInt64Builder}; use datafusion::arrow::error::ArrowError; use datafusion::arrow::{ array::StringBuilder, @@ -10,9 +10,9 @@ use datafusion::execution::TaskContext; use datafusion::logical_expr::Signature; use datafusion_common::DataFusionError; use datafusion_expr::{AggregateUDF, ScalarUDF, WindowUDF}; +use datafusion_physical_plan::SendableRecordBatchStream; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::streaming::PartitionStream; -use datafusion_physical_plan::SendableRecordBatchStream; use std::fmt::Debug; use std::sync::Arc; diff --git a/crates/runtime/src/execution/catalog/information_schema/routines.rs b/crates/df-catalog/src/information_schema/routines.rs similarity index 98% rename from crates/runtime/src/execution/catalog/information_schema/routines.rs rename to crates/df-catalog/src/information_schema/routines.rs index fc55793ea..268037703 100644 --- a/crates/runtime/src/execution/catalog/information_schema/routines.rs +++ b/crates/df-catalog/src/information_schema/routines.rs @@ -1,4 +1,4 @@ -use crate::execution::catalog::information_schema::config::InformationSchemaConfig; +use crate::information_schema::config::InformationSchemaConfig; use datafusion::arrow::array::builder::BooleanBuilder; use datafusion::arrow::error::ArrowError; use datafusion::arrow::{ @@ -8,9 +8,9 @@ use datafusion::arrow::{ }; use datafusion::execution::TaskContext; use datafusion_common::DataFusionError; +use datafusion_physical_plan::SendableRecordBatchStream; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::streaming::PartitionStream; -use datafusion_physical_plan::SendableRecordBatchStream; use std::fmt::Debug; use std::sync::Arc; diff --git a/crates/runtime/src/execution/catalog/information_schema/schemata.rs b/crates/df-catalog/src/information_schema/schemata.rs similarity index 98% rename from crates/runtime/src/execution/catalog/information_schema/schemata.rs rename to crates/df-catalog/src/information_schema/schemata.rs index 53b32a7e5..daed7a0fa 100644 --- a/crates/runtime/src/execution/catalog/information_schema/schemata.rs +++ b/crates/df-catalog/src/information_schema/schemata.rs @@ -2,7 +2,7 @@ //! //! [Information Schema Schemata]: https://docs.snowflake.com/en/sql-reference/info-schema/schemata -use crate::execution::catalog::information_schema::config::InformationSchemaConfig; +use crate::information_schema::config::InformationSchemaConfig; use datafusion::arrow::error::ArrowError; use datafusion::arrow::{ array::StringBuilder, @@ -11,9 +11,9 @@ use datafusion::arrow::{ }; use datafusion::execution::TaskContext; use datafusion_common::DataFusionError; +use datafusion_physical_plan::SendableRecordBatchStream; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::streaming::PartitionStream; -use datafusion_physical_plan::SendableRecordBatchStream; use std::fmt::Debug; use std::sync::Arc; diff --git a/crates/runtime/src/execution/catalog/information_schema/tables.rs b/crates/df-catalog/src/information_schema/tables.rs similarity index 97% rename from crates/runtime/src/execution/catalog/information_schema/tables.rs rename to crates/df-catalog/src/information_schema/tables.rs index 42294fa00..2d58dd79f 100644 --- a/crates/runtime/src/execution/catalog/information_schema/tables.rs +++ b/crates/df-catalog/src/information_schema/tables.rs @@ -2,7 +2,7 @@ //! //! [Information Schema Tables]: https://docs.snowflake.com/en/sql-reference/info-schema/tables -use crate::execution::catalog::information_schema::config::InformationSchemaConfig; +use crate::information_schema::config::InformationSchemaConfig; use datafusion::arrow::error::ArrowError; use datafusion::arrow::{ array::StringBuilder, @@ -12,9 +12,9 @@ use datafusion::arrow::{ use datafusion::execution::TaskContext; use datafusion_common::DataFusionError; use datafusion_expr::TableType; +use datafusion_physical_plan::SendableRecordBatchStream; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::streaming::PartitionStream; -use datafusion_physical_plan::SendableRecordBatchStream; use std::fmt::Debug; use std::sync::Arc; diff --git a/crates/runtime/src/execution/catalog/information_schema/views.rs b/crates/df-catalog/src/information_schema/views.rs similarity index 97% rename from crates/runtime/src/execution/catalog/information_schema/views.rs rename to crates/df-catalog/src/information_schema/views.rs index eca03e3ce..870ea6fec 100644 --- a/crates/runtime/src/execution/catalog/information_schema/views.rs +++ b/crates/df-catalog/src/information_schema/views.rs @@ -2,7 +2,7 @@ //! //! [Information Schema Views]: https://docs.snowflake.com/en/sql-reference/info-schema/views -use crate::execution::catalog::information_schema::config::InformationSchemaConfig; +use crate::information_schema::config::InformationSchemaConfig; use datafusion::arrow::error::ArrowError; use datafusion::arrow::{ array::StringBuilder, @@ -11,9 +11,9 @@ use datafusion::arrow::{ }; use datafusion::execution::TaskContext; use datafusion_common::DataFusionError; +use datafusion_physical_plan::SendableRecordBatchStream; use datafusion_physical_plan::stream::RecordBatchStreamAdapter; use datafusion_physical_plan::streaming::PartitionStream; -use datafusion_physical_plan::SendableRecordBatchStream; use std::fmt::Debug; use std::sync::Arc; diff --git a/crates/runtime/src/execution/catalog/mod.rs b/crates/df-catalog/src/lib.rs similarity index 91% rename from crates/runtime/src/execution/catalog/mod.rs rename to crates/df-catalog/src/lib.rs index a294938cc..cbcfeae65 100644 --- a/crates/runtime/src/execution/catalog/mod.rs +++ b/crates/df-catalog/src/lib.rs @@ -2,6 +2,7 @@ pub mod catalog; pub mod catalog_list; pub mod catalogs; +pub mod error; pub mod schema; pub mod table; diff --git a/crates/runtime/src/execution/catalog/schema.rs b/crates/df-catalog/src/schema.rs similarity index 98% rename from crates/runtime/src/execution/catalog/schema.rs rename to crates/df-catalog/src/schema.rs index 57d33e141..4f9156412 100644 --- a/crates/runtime/src/execution/catalog/schema.rs +++ b/crates/df-catalog/src/schema.rs @@ -1,4 +1,4 @@ -use crate::execution::catalog::table::CachingTable; +use crate::table::CachingTable; use async_trait::async_trait; use dashmap::DashMap; use datafusion::catalog::{SchemaProvider, TableProvider}; diff --git a/crates/runtime/src/execution/catalog/table.rs b/crates/df-catalog/src/table.rs similarity index 100% rename from crates/runtime/src/execution/catalog/table.rs rename to crates/df-catalog/src/table.rs diff --git a/crates/embucketd/Cargo.toml b/crates/embucketd/Cargo.toml new file mode 100644 index 000000000..bb7af2830 --- /dev/null +++ b/crates/embucketd/Cargo.toml @@ -0,0 +1,41 @@ +[package] +name = "embucketd" +version = "0.1.0" +edition = "2024" +license-file.workspace = true + +[dependencies] +core-executor = { path = "../core-executor" } +core-metastore = { path = "../core-metastore" } +core-utils = { path = "../core-utils" } +core-history = { path = "../core-history" } +api-ui = { path = "../api-ui" } +api-sessions = { path = "../api-sessions" } +api-snowflake-rest = { path = "../api-snowflake-rest" } +api-iceberg-rest = { path = "../api-iceberg-rest" } +api-internal-rest = { path = "../api-internal-rest" } + +axum = { workspace = true } +clap = { version = "4.5.27", features = ["env", "derive"] } +dotenv = "0.15.0" +object_store = { workspace = true } +tracing = { workspace = true } +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +snmalloc-rs = { workspace = true } +time = { workspace = true } +tower = { workspace = true } +tokio = { workspace = true } +slatedb = { workspace = true } +tower-sessions = { workspace = true } +tower-http = { workspace = true } +serde_yaml = { workspace = true } +utoipa = { workspace = true } +utoipa-axum = { workspace = true } +utoipa-swagger-ui = { workspace = true } + +[build-dependencies] +tar = { workspace = true } +tokio = { workspace = true } + +[lints] +workspace = true diff --git a/bin/bucketd/build.rs b/crates/embucketd/build.rs similarity index 100% rename from bin/bucketd/build.rs rename to crates/embucketd/build.rs diff --git a/bin/bucketd/src/cli.rs b/crates/embucketd/src/cli.rs similarity index 96% rename from bin/bucketd/src/cli.rs rename to crates/embucketd/src/cli.rs index f55726ec1..e60e24b2c 100644 --- a/bin/bucketd/src/cli.rs +++ b/crates/embucketd/src/cli.rs @@ -1,7 +1,7 @@ use clap::{Parser, ValueEnum}; use object_store::{ - aws::AmazonS3Builder, aws::S3ConditionalPut, local::LocalFileSystem, memory::InMemory, - ObjectStore, Result as ObjectStoreResult, + ObjectStore, Result as ObjectStoreResult, aws::AmazonS3Builder, aws::S3ConditionalPut, + local::LocalFileSystem, memory::InMemory, }; use std::fs; use std::path::PathBuf; @@ -213,7 +213,9 @@ impl CliOpts { // method resets a secret env pub fn jwt_secret(&self) -> String { - std::env::remove_var("JWT_SECRET"); + unsafe { + std::env::remove_var("JWT_SECRET"); + } self.jwt_secret.clone().unwrap_or_default() } } diff --git a/crates/embucketd/src/main.rs b/crates/embucketd/src/main.rs new file mode 100644 index 000000000..872557d3f --- /dev/null +++ b/crates/embucketd/src/main.rs @@ -0,0 +1,257 @@ +pub(crate) mod cli; + +use api_iceberg_rest::router::create_router as create_iceberg_router; +use api_iceberg_rest::state::Config as IcebergConfig; +use api_iceberg_rest::state::State as IcebergAppState; +use api_internal_rest::router::create_router as create_internal_router; +use api_internal_rest::state::State as InternalAppState; +use api_sessions::{RequestSessionMemory, RequestSessionStore}; +use api_snowflake_rest::router::create_router as create_snowflake_router; +use api_snowflake_rest::state::AppState as SnowflakeAppState; +use api_ui::auth::layer::require_auth; +use api_ui::auth::router::create_router as create_ui_auth_router; +use api_ui::config::AuthConfig as UIAuthConfig; +use api_ui::config::WebConfig as UIWebConfig; +use api_ui::layers::make_cors_middleware; +use api_ui::router::create_router as create_ui_router; +use api_ui::router::ui_open_api_spec; +use api_ui::state::AppState as UIAppState; +use api_ui::web_assets::config::StaticWebConfig; +use api_ui::web_assets::server::run_web_assets_server; +use axum::middleware; +use axum::{ + Json, Router, + routing::{get, post}, +}; +use clap::Parser; +use core_executor::service::CoreExecutionService; +use core_executor::utils::Config as ExecutionConfig; +use core_history::RecordingExecutionService; +use core_history::SlateDBWorksheetsStore; +use core_metastore::SlateDBMetastore; +use core_utils::Db; +use dotenv::dotenv; +use object_store::path::Path; +use slatedb::{Db as SlateDb, config::DbOptions}; +use std::fs; +use std::sync::Arc; +use time::Duration; +use tokio::signal; +use tower_http::catch_panic::CatchPanicLayer; +use tower_http::timeout::TimeoutLayer; +use tower_http::trace::TraceLayer; +use tower_sessions::{Expiry, SessionManagerLayer}; +use tracing_subscriber::{layer::SubscriberExt, util::SubscriberInitExt}; +use utoipa::OpenApi; +use utoipa::openapi; +use utoipa_swagger_ui::SwaggerUi; + +#[global_allocator] +static ALLOCATOR: snmalloc_rs::SnMalloc = snmalloc_rs::SnMalloc; + +#[tokio::main] +#[allow( + clippy::expect_used, + clippy::unwrap_used, + clippy::print_stdout, + clippy::too_many_lines +)] +async fn main() { + dotenv().ok(); + + tracing_subscriber::registry() + .with( + tracing_subscriber::EnvFilter::try_from_default_env() + .unwrap_or_else(|_| "embucketd=debug,tower_http=debug".into()), + ) + .with(tracing_subscriber::fmt::layer()) + .init(); + + let opts = cli::CliOpts::parse(); + let slatedb_prefix = opts.slatedb_prefix.clone(); + let data_format = opts + .data_format + .clone() + .unwrap_or_else(|| "json".to_string()); + let execution_cfg = + ExecutionConfig::new(&data_format).expect("Failed to create execution config"); + let mut auth_config = UIAuthConfig::new(opts.jwt_secret()); + auth_config.with_demo_credentials( + opts.auth_demo_user.clone().unwrap(), + opts.auth_demo_password.clone().unwrap(), + ); + let web_config = UIWebConfig { + host: opts.host.clone().unwrap(), + port: opts.port.unwrap(), + allow_origin: opts.cors_allow_origin.clone(), + }; + let iceberg_config = IcebergConfig { + iceberg_catalog_url: opts.catalog_url.clone().unwrap(), + }; + let static_web_config = StaticWebConfig { + host: web_config.host.clone(), + port: opts.assets_port.unwrap(), + }; + + let object_store = opts + .object_store_backend() + .expect("Failed to create object store"); + let db = Db::new(Arc::new( + SlateDb::open_with_opts( + Path::from(slatedb_prefix), + DbOptions::default(), + object_store.clone(), + ) + .await + .expect("Failed to start Slate DB"), + )); + + let metastore = Arc::new(SlateDBMetastore::new(db.clone())); + let history_store = Arc::new(SlateDBWorksheetsStore::new(db.clone())); + + let execution_svc = Arc::new(CoreExecutionService::new(metastore.clone(), execution_cfg)); + let execution_svc = Arc::new(RecordingExecutionService::new( + execution_svc, + history_store.clone(), + )); + + let session_memory = RequestSessionMemory::default(); + let session_store = RequestSessionStore::new(session_memory, execution_svc.clone()); + + tokio::task::spawn( + session_store + .clone() + .continuously_delete_expired(tokio::time::Duration::from_secs(60)), + ); + + let session_layer = SessionManagerLayer::new(session_store) + .with_secure(false) + .with_expiry(Expiry::OnInactivity(Duration::seconds(5 * 60))); + + let internal_router = + create_internal_router().with_state(InternalAppState::new(metastore.clone())); + let ui_state = UIAppState::new( + metastore.clone(), + history_store, + execution_svc.clone(), + Arc::new(web_config.clone()), + Arc::new(auth_config), + ); + let ui_router = create_ui_router().with_state(ui_state.clone()); + let ui_router = ui_router.layer(middleware::from_fn_with_state( + ui_state.clone(), + require_auth, + )); + let ui_auth_router = create_ui_auth_router().with_state(ui_state.clone()); + let snowflake_router = + create_snowflake_router().with_state(SnowflakeAppState { execution_svc }); + let iceberg_router = create_iceberg_router().with_state(IcebergAppState { + metastore, + config: Arc::new(iceberg_config), + }); + + // --- OpenAPI specs --- + let mut spec = ApiDoc::openapi(); + if let Some(extra_spec) = load_openapi_spec() { + spec = spec.merge_from(extra_spec); + } + + let ui_spec = ui_open_api_spec(); + + let ui_router = Router::new() + .nest("/ui", ui_router) + .nest("/ui/auth", ui_auth_router); + let ui_router = match web_config.allow_origin { + Some(allow_origin) => ui_router.layer(make_cors_middleware(&allow_origin)), + None => ui_router, + }; + + let router = Router::new() + .merge(ui_router) + .nest("/v1/metastore", internal_router) + .nest("/v1", snowflake_router) + .nest("/catalog", iceberg_router) + .merge( + SwaggerUi::new("/") + .url("/openapi.json", spec) + .url("/ui_openapi.json", ui_spec), + ) + .route("/health", get(|| async { Json("OK") })) + .route("/telemetry/send", post(|| async { Json("OK") })) + .layer(session_layer) + .layer(TraceLayer::new_for_http()) + .layer(TimeoutLayer::new(std::time::Duration::from_secs(1200))) + .layer(CatchPanicLayer::new()); + + // Runs static assets server in background + run_web_assets_server(&static_web_config) + .await + .expect("Failed to start static assets server"); + + let host = web_config.host.clone(); + let port = web_config.port; + let listener = tokio::net::TcpListener::bind(format!("{host}:{port}")) + .await + .expect("Failed to bind to address"); + let addr = listener.local_addr().expect("Failed to get local address"); + tracing::info!("Listening on http://{}", addr); + axum::serve(listener, router) + .with_graceful_shutdown(shutdown_signal(Arc::new(db.clone()))) + .await + .expect("Failed to start server"); +} + +/// This func will wait for a signal to shutdown the service. +/// It will wait for either a Ctrl+C signal or a SIGTERM signal. +/// +/// # Panics +/// If the function fails to install the signal handler, it will panic. +#[allow( + clippy::expect_used, + clippy::redundant_pub_crate, + clippy::cognitive_complexity +)] +async fn shutdown_signal(db: Arc) { + let ctrl_c = async { + signal::ctrl_c() + .await + .expect("failed to install Ctrl+C handler"); + }; + + #[cfg(unix)] + let terminate = async { + signal::unix::signal(signal::unix::SignalKind::terminate()) + .expect("failed to install signal handler") + .recv() + .await; + }; + + #[cfg(not(unix))] + let terminate = std::future::pending::<()>(); + + tokio::select! { + () = ctrl_c => { + db.close().await.expect("Failed to close database"); + tracing::warn!("Ctrl+C received, starting graceful shutdown"); + }, + () = terminate => { + db.close().await.expect("Failed to close database"); + tracing::warn!("SIGTERM received, starting graceful shutdown"); + }, + } + + tracing::warn!("signal received, starting graceful shutdown"); +} + +// TODO: Fix OpenAPI spec generation +#[derive(OpenApi)] +#[openapi()] +pub struct ApiDoc; + +fn load_openapi_spec() -> Option { + let openapi_yaml_content = fs::read_to_string("rest-catalog-open-api.yaml").ok()?; + let mut original_spec = serde_yaml::from_str::(&openapi_yaml_content).ok()?; + // Dropping all paths from the original spec + original_spec.paths = openapi::Paths::new(); + Some(original_spec) +} diff --git a/crates/runtime/Cargo.toml b/crates/runtime/Cargo.toml index 51514a0a8..31d57b8ab 100644 --- a/crates/runtime/Cargo.toml +++ b/crates/runtime/Cargo.toml @@ -100,7 +100,7 @@ tower-http = { version = "0.6.1", features = [ tower-sessions = { version = "0.14.0" } tracing = { workspace = true } tracing-attributes = { workspace = true } -url = { version = "2.5.4" } +url = { workspace = true } utoipa = { workspace = true } utoipa-axum = { workspace = true } diff --git a/crates/runtime/src/execution/datafusion/visitors/mod.rs b/crates/runtime/src/execution/datafusion/visitors/mod.rs deleted file mode 100644 index b6b557d30..000000000 --- a/crates/runtime/src/execution/datafusion/visitors/mod.rs +++ /dev/null @@ -1,21 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -//pub mod analyzer; -//pub mod error; -pub mod functions_rewriter; -pub mod json_element; diff --git a/crates/runtime/src/execution/recording_service.rs b/crates/runtime/src/execution/recording_service.rs deleted file mode 100644 index f5a2e8761..000000000 --- a/crates/runtime/src/execution/recording_service.rs +++ /dev/null @@ -1,96 +0,0 @@ -use crate::execution::error::ExecutionResult; -use crate::execution::models::ColumnInfo; -use crate::execution::query::QueryContext; -use crate::execution::service::ExecutionService; -use crate::execution::session::UserSession; -use crate::execution::utils::Config; -use crate::http::ui::queries::models::ResultSet; -use bytes::Bytes; -use datafusion::arrow::array::RecordBatch; -use datafusion::arrow::csv::reader::Format; -use embucket_history::{QueryRecord, QueryRecordActions, WorksheetsStore}; -use embucket_metastore::TableIdent as MetastoreTableIdent; -use std::sync::Arc; - -pub struct RecordingExecutionService { - pub execution: Arc, - pub store: Arc, -} - -//TODO: add tests -impl RecordingExecutionService { - pub fn new(execution: Arc, store: Arc) -> Self { - Self { execution, store } - } -} - -#[async_trait::async_trait] -impl ExecutionService for RecordingExecutionService { - async fn create_session(&self, session_id: String) -> ExecutionResult> { - self.execution.create_session(session_id).await - } - - async fn delete_session(&self, session_id: String) -> ExecutionResult<()> { - self.execution.delete_session(session_id).await - } - - async fn query( - &self, - session_id: &str, - query: &str, - query_context: QueryContext, - ) -> ExecutionResult<(Vec, Vec)> { - let mut query_record = QueryRecord::query_start(query, query_context.worksheet_id); - let query_res = self.execution.query(session_id, query, query_context).await; - match query_res { - Ok((ref records, ref columns)) => { - let result_set = ResultSet::query_result_to_result_set(records, columns); - match result_set { - Ok(result_set) => { - let encoded_res = serde_json::to_string(&result_set); - - if let Ok(encoded_res) = encoded_res { - let result_count = i64::try_from(records.len()).unwrap_or(0); - query_record.query_finished(result_count, Some(encoded_res)); - } - // failed to wrap query results - else if let Err(err) = encoded_res { - query_record.query_finished_with_error(err.to_string()); - } - } - // error getting result_set - Err(err) => { - query_record.query_finished_with_error(err.to_string()); - } - } - } - // query error - Err(ref err) => { - // query execution error - query_record.query_finished_with_error(err.to_string()); - } - } - // add query record - if let Err(err) = self.store.add_query(&query_record).await { - // do not raise error, just log ? - tracing::error!("{err}"); - } - query_res - } - async fn upload_data_to_table( - &self, - session_id: &str, - table_ident: &MetastoreTableIdent, - data: Bytes, - file_name: &str, - format: Format, - ) -> ExecutionResult { - self.execution - .upload_data_to_table(session_id, table_ident, data, file_name, format) - .await - } - - fn config(&self) -> &Config { - self.execution.config() - } -} diff --git a/crates/runtime/src/http/config.rs b/crates/runtime/src/http/config.rs deleted file mode 100644 index 948f539ed..000000000 --- a/crates/runtime/src/http/config.rs +++ /dev/null @@ -1,10 +0,0 @@ -use serde::{Deserialize, Serialize}; - -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct WebConfig { - pub host: String, - pub port: u16, - pub allow_origin: Option, - pub data_format: String, - pub iceberg_catalog_url: String, -} diff --git a/crates/runtime/src/http/dbt/error.rs b/crates/runtime/src/http/dbt/error.rs deleted file mode 100644 index 88ec2ac8b..000000000 --- a/crates/runtime/src/http/dbt/error.rs +++ /dev/null @@ -1,200 +0,0 @@ -use axum::{http, response::IntoResponse, Json}; -use snafu::prelude::*; - -use super::schemas::JsonResponse; -use crate::execution::error::ExecutionError; -use datafusion::arrow::error::ArrowError; - -#[derive(Snafu, Debug)] -#[snafu(visibility(pub(crate)))] -pub enum DbtError { - #[snafu(display("Failed to decompress GZip body"))] - GZipDecompress { source: std::io::Error }, - - #[snafu(display("Failed to parse login request"))] - LoginRequestParse { source: serde_json::Error }, - - #[snafu(display("Failed to parse query body"))] - QueryBodyParse { source: serde_json::Error }, - - #[snafu(display("Missing auth token"))] - MissingAuthToken, - - #[snafu(display("Invalid warehouse_id format"))] - InvalidWarehouseIdFormat { source: uuid::Error }, - - #[snafu(display("Missing DBT session"))] - MissingDbtSession, - - #[snafu(display("Invalid auth data"))] - InvalidAuthData, - - #[snafu(display("Feature not implemented"))] - NotImplemented, - - #[snafu(display("Failed to parse row JSON"))] - RowParse { source: serde_json::Error }, - - #[snafu(display("UTF8 error: {source}"))] - Utf8 { source: std::string::FromUtf8Error }, - - #[snafu(display("Arrow error: {source}"))] - Arrow { source: ArrowError }, - - #[snafu(transparent)] - Metastore { - source: crate::http::metastore::error::MetastoreAPIError, - }, - - #[snafu(transparent)] - Execution { - source: crate::execution::error::ExecutionError, - }, -} - -pub type DbtResult = std::result::Result; - -impl IntoResponse for DbtError { - fn into_response(self) -> axum::response::Response { - if let Self::Execution { source } = self { - return source.into_response(); - } - if let Self::Metastore { source } = self { - return source.into_response(); - } - - let status_code = match &self { - Self::GZipDecompress { .. } - | Self::LoginRequestParse { .. } - | Self::QueryBodyParse { .. } - | Self::InvalidWarehouseIdFormat { .. } => http::StatusCode::BAD_REQUEST, - Self::RowParse { .. } - | Self::Utf8 { .. } - | Self::Arrow { .. } - | Self::Metastore { .. } - | Self::Execution { .. } - | Self::NotImplemented { .. } => http::StatusCode::OK, - Self::MissingAuthToken | Self::MissingDbtSession | Self::InvalidAuthData => { - http::StatusCode::UNAUTHORIZED - } - }; - - let message = match &self { - Self::GZipDecompress { source } => format!("failed to decompress GZip body: {source}"), - Self::LoginRequestParse { source } => { - format!("failed to parse login request: {source}") - } - Self::QueryBodyParse { source } => format!("failed to parse query body: {source}"), - Self::InvalidWarehouseIdFormat { source } => format!("invalid warehouse_id: {source}"), - Self::RowParse { source } => format!("failed to parse row JSON: {source}"), - Self::MissingAuthToken | Self::MissingDbtSession | Self::InvalidAuthData => { - "session error".to_string() - } - Self::Utf8 { source } => { - format!("Error encoding UTF8 string: {source}") - } - Self::Arrow { source } => { - format!("Error encoding in Arrow format: {source}") - } - Self::NotImplemented => "feature not implemented".to_string(), - Self::Metastore { source } => source.to_string(), - Self::Execution { source } => source.to_string(), - }; - - let body = Json(JsonResponse { - success: false, - message: Some(message), - // TODO: On error data field contains details about actual error - // {'data': {'internalError': False, 'unredactedFromSecureObject': False, 'errorCode': '002003', 'age': 0, 'sqlState': '02000', 'queryId': '01bb407f-0002-97af-0004-d66e006a69fa', 'line': 1, 'pos': 14, 'type': 'COMPILATION'}} - data: None, - code: Some(status_code.as_u16().to_string()), - }); - (status_code, body).into_response() - } -} - -impl IntoResponse for ExecutionError { - fn into_response(self) -> axum::response::Response { - let status_code = match &self { - Self::RegisterUDF { .. } - | Self::RegisterUDAF { .. } - | Self::InvalidTableIdentifier { .. } - | Self::InvalidSchemaIdentifier { .. } - | Self::InvalidFilePath { .. } - | Self::InvalidBucketIdentifier { .. } - | Self::TableProviderNotFound { .. } - | Self::MissingDataFusionSession { .. } - | Self::Utf8 { .. } - | Self::VolumeNotFound { .. } - | Self::ObjectStore { .. } - | Self::ObjectAlreadyExists { .. } - | Self::UnsupportedFileFormat { .. } - | Self::RefreshCatalogList { .. } - | Self::UrlParse { .. } - | Self::JobError { .. } - | Self::UploadFailed { .. } => http::StatusCode::BAD_REQUEST, - Self::Arrow { .. } - | Self::S3Tables { .. } - | Self::Iceberg { .. } - | Self::CatalogListDowncast { .. } - | Self::CatalogDownCast { .. } - | Self::RegisterCatalog { .. } => http::StatusCode::INTERNAL_SERVER_ERROR, - Self::DatabaseNotFound { .. } - | Self::TableNotFound { .. } - | Self::SchemaNotFound { .. } - | Self::CatalogNotFound { .. } - | Self::Metastore { .. } - | Self::DataFusion { .. } - | Self::DataFusionQuery { .. } => http::StatusCode::OK, - }; - - let message = match &self { - Self::DataFusion { source } => format!("DataFusion error: {source}"), - Self::DataFusionQuery { source, query } => { - format!("DataFusion error: {source}, query: {query}") - } - Self::InvalidTableIdentifier { ident } => { - format!("Invalid table identifier: {ident}") - } - Self::InvalidSchemaIdentifier { ident } => { - format!("Invalid schema identifier: {ident}") - } - Self::InvalidFilePath { path } => format!("Invalid file path: {path}"), - Self::InvalidBucketIdentifier { ident } => { - format!("Invalid bucket identifier: {ident}") - } - Self::Arrow { source } => format!("Arrow error: {source}"), - Self::TableProviderNotFound { table_name } => { - format!("No Table Provider found for table: {table_name}") - } - Self::MissingDataFusionSession { id } => { - format!("Missing DataFusion session for id: {id}") - } - Self::Utf8 { source } => format!("Error encoding UTF8 string: {source}"), - Self::Metastore { source } => format!("Metastore error: {source}"), - Self::DatabaseNotFound { db } => format!("Database not found: {db}"), - Self::TableNotFound { table } => format!("Table not found: {table}"), - Self::SchemaNotFound { schema } => format!("Schema not found: {schema}"), - Self::VolumeNotFound { volume } => format!("Volume not found: {volume}"), - Self::ObjectStore { source } => format!("Object store error: {source}"), - Self::ObjectAlreadyExists { type_name, name } => { - format!("Object of type {type_name} with name {name} already exists") - } - Self::UnsupportedFileFormat { format } => { - format!("Unsupported file format {format}") - } - Self::RefreshCatalogList { message } => message.clone(), - _ => "Internal server error".to_string(), - }; - - let body = Json(JsonResponse { - success: false, - message: Some(message), - // TODO: On error data field contains details about actual error - // {'data': {'internalError': False, 'unredactedFromSecureObject': False, 'errorCode': '002003', 'age': 0, 'sqlState': '02000', 'queryId': '01bb407f-0002-97af-0004-d66e006a69fa', 'line': 1, 'pos': 14, 'type': 'COMPILATION'}} - data: None, - code: Some(status_code.as_u16().to_string()), - }); - (status_code, body).into_response() - } -} diff --git a/crates/runtime/src/http/error.rs b/crates/runtime/src/http/error.rs deleted file mode 100644 index c28dc1a60..000000000 --- a/crates/runtime/src/http/error.rs +++ /dev/null @@ -1,45 +0,0 @@ -use axum::{response::IntoResponse, response::Response}; -use serde::{Deserialize, Serialize}; -use snafu::prelude::*; - -#[derive(Debug, Snafu)] -#[snafu(visibility(pub(crate)))] -pub enum RuntimeHttpError { - #[snafu(transparent)] - Metastore { - source: crate::http::metastore::error::MetastoreAPIError, - }, - #[snafu(transparent)] - Dbt { - source: crate::http::dbt::error::DbtError, - }, - #[snafu(transparent)] - UI { - source: crate::http::ui::error::UIError, - }, -} - -impl IntoResponse for RuntimeHttpError { - fn into_response(self) -> Response { - match self { - Self::Metastore { source } => source.into_response(), - Self::Dbt { source } => source.into_response(), - Self::UI { source } => source.into_response(), - } - } -} - -//pub struct RuntimeHttpResult(pub T); -pub type RuntimeHttpResult = Result; - -#[derive(Debug, Serialize, Deserialize, utoipa::ToSchema)] -pub struct ErrorResponse { - pub message: String, - pub status_code: u16, -} - -impl std::fmt::Display for ErrorResponse { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "ErrorResponse(\"{}\")", self.message) - } -} diff --git a/crates/runtime/src/http/metastore/handlers.rs b/crates/runtime/src/http/metastore/handlers.rs deleted file mode 100644 index 851ac41ed..000000000 --- a/crates/runtime/src/http/metastore/handlers.rs +++ /dev/null @@ -1,345 +0,0 @@ -use super::error::{MetastoreAPIError, MetastoreAPIResult}; -use axum::{ - extract::{Path, Query, State}, - Json, -}; -use snafu::ResultExt; - -#[allow(clippy::wildcard_imports)] -use embucket_metastore::{ - error::{self as metastore_error, MetastoreError}, - *, -}; - -use crate::http::state::AppState; -use embucket_utils::scan_iterator::ScanIterator; -use validator::Validate; - -pub type RwObjectVec = Vec>; - -#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] -pub struct QueryParameters { - #[serde(default)] - pub cascade: Option, -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn list_volumes( - State(state): State, -) -> MetastoreAPIResult>> { - let volumes = state - .metastore - .iter_volumes() - .collect() - .await - .map_err(|e| MetastoreAPIError(MetastoreError::UtilSlateDB { source: e }))? - .iter() - .map(|v| hide_sensitive(v.clone())) - .collect(); - Ok(Json(volumes)) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn get_volume( - State(state): State, - Path(volume_name): Path, -) -> MetastoreAPIResult>> { - match state.metastore.get_volume(&volume_name).await { - Ok(Some(volume)) => Ok(Json(hide_sensitive(volume))), - Ok(None) => Err(MetastoreError::VolumeNotFound { - volume: volume_name.clone(), - } - .into()), - Err(e) => Err(e.into()), - } -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn create_volume( - State(state): State, - Json(volume): Json, -) -> MetastoreAPIResult>> { - volume - .validate() - .context(metastore_error::ValidationSnafu)?; - state - .metastore - .create_volume(&volume.ident.clone(), volume) - .await - .map_err(MetastoreAPIError) - .map(|v| Json(hide_sensitive(v))) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn update_volume( - State(state): State, - Path(volume_name): Path, - Json(volume): Json, -) -> MetastoreAPIResult>> { - volume - .validate() - .context(metastore_error::ValidationSnafu)?; - state - .metastore - .update_volume(&volume_name, volume) - .await - .map_err(MetastoreAPIError) - .map(|v| Json(hide_sensitive(v))) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn delete_volume( - State(state): State, - Query(query): Query, - Path(volume_name): Path, -) -> MetastoreAPIResult<()> { - state - .metastore - .delete_volume(&volume_name, query.cascade.unwrap_or_default()) - .await - .map_err(MetastoreAPIError) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn list_databases( - State(state): State, -) -> MetastoreAPIResult>>> { - state - .metastore - .iter_databases() - .collect() - .await - .map_err(|e| MetastoreAPIError(MetastoreError::UtilSlateDB { source: e })) - .map(Json) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn get_database( - State(state): State, - Path(database_name): Path, -) -> MetastoreAPIResult>> { - match state.metastore.get_database(&database_name).await { - Ok(Some(db)) => Ok(Json(db)), - Ok(None) => Err(MetastoreError::DatabaseNotFound { - db: database_name.clone(), - } - .into()), - Err(e) => Err(e.into()), - } -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn create_database( - State(state): State, - Json(database): Json, -) -> MetastoreAPIResult>> { - database - .validate() - .context(metastore_error::ValidationSnafu)?; - state - .metastore - .create_database(&database.ident.clone(), database) - .await - .map_err(MetastoreAPIError) - .map(Json) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn update_database( - State(state): State, - Path(database_name): Path, - Json(database): Json, -) -> MetastoreAPIResult>> { - database - .validate() - .context(metastore_error::ValidationSnafu)?; - //TODO: Implement database renames - state - .metastore - .update_database(&database_name, database) - .await - .map_err(MetastoreAPIError) - .map(Json) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn delete_database( - State(state): State, - Query(query): Query, - Path(database_name): Path, -) -> MetastoreAPIResult<()> { - state - .metastore - .delete_database(&database_name, query.cascade.unwrap_or_default()) - .await - .map_err(MetastoreAPIError) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn list_schemas( - State(state): State, - Path(database_name): Path, -) -> MetastoreAPIResult>>> { - state - .metastore - .iter_schemas(&database_name) - .collect() - .await - .map_err(|e| MetastoreAPIError(MetastoreError::UtilSlateDB { source: e })) - .map(Json) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn get_schema( - State(state): State, - Path((database_name, schema_name)): Path<(String, String)>, -) -> MetastoreAPIResult>> { - let schema_ident = SchemaIdent { - database: database_name.clone(), - schema: schema_name.clone(), - }; - match state.metastore.get_schema(&schema_ident).await { - Ok(Some(schema)) => Ok(Json(schema)), - Ok(None) => Err(MetastoreError::SchemaNotFound { - db: database_name.clone(), - schema: schema_name.clone(), - } - .into()), - Err(e) => Err(e.into()), - } -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn create_schema( - State(state): State, - Path(database_name): Path, - Json(schema): Json, -) -> MetastoreAPIResult>> { - state - .metastore - .create_schema(&schema.ident.clone(), schema) - .await - .map_err(MetastoreAPIError) - .map(Json) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn update_schema( - State(state): State, - Path((database_name, schema_name)): Path<(String, String)>, - Json(schema): Json, -) -> MetastoreAPIResult>> { - let schema_ident = SchemaIdent::new(database_name, schema_name); - // TODO: Implement schema renames - state - .metastore - .update_schema(&schema_ident, schema) - .await - .map_err(MetastoreAPIError) - .map(Json) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn delete_schema( - State(state): State, - Query(query): Query, - Path((database_name, schema_name)): Path<(String, String)>, -) -> MetastoreAPIResult<()> { - let schema_ident = SchemaIdent::new(database_name, schema_name); - state - .metastore - .delete_schema(&schema_ident, query.cascade.unwrap_or_default()) - .await - .map_err(MetastoreAPIError) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn list_tables( - State(state): State, - Path((database_name, schema_name)): Path<(String, String)>, -) -> MetastoreAPIResult>>> { - let schema_ident = SchemaIdent::new(database_name, schema_name); - state - .metastore - .iter_tables(&schema_ident) - .collect() - .await - .map_err(|e| MetastoreAPIError(MetastoreError::UtilSlateDB { source: e })) - .map(Json) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn get_table( - State(state): State, - Path((database_name, schema_name, table_name)): Path<(String, String, String)>, -) -> MetastoreAPIResult>> { - let table_ident = TableIdent::new(&database_name, &schema_name, &table_name); - match state.metastore.get_table(&table_ident).await { - Ok(Some(table)) => Ok(Json(table)), - Ok(None) => Err(MetastoreError::TableNotFound { - db: database_name.clone(), - schema: schema_name.clone(), - table: table_name.clone(), - } - .into()), - Err(e) => Err(e.into()), - } -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn create_table( - State(state): State, - Path((database_name, schema_name)): Path<(String, String)>, - Json(table): Json, -) -> MetastoreAPIResult>> { - table.validate().context(metastore_error::ValidationSnafu)?; - let table_ident = TableIdent::new(&database_name, &schema_name, &table.ident.table); - state - .metastore - .create_table(&table_ident, table) - .await - .map_err(MetastoreAPIError) - .map(Json) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn update_table( - State(state): State, - Path((database_name, schema_name, table_name)): Path<(String, String, String)>, - Json(table): Json, -) -> MetastoreAPIResult>> { - let table_ident = TableIdent::new(&database_name, &schema_name, &table_name); - state - .metastore - .update_table(&table_ident, table) - .await - .map_err(MetastoreAPIError) - .map(Json) -} - -#[tracing::instrument(level = "debug", skip(state), err, ret(level = tracing::Level::TRACE))] -pub async fn delete_table( - State(state): State, - Query(query): Query, - Path((database_name, schema_name, table_name)): Path<(String, String, String)>, -) -> MetastoreAPIResult<()> { - let table_ident = TableIdent::new(&database_name, &schema_name, &table_name); - state - .metastore - .delete_table(&table_ident, query.cascade.unwrap_or_default()) - .await - .map_err(MetastoreAPIError) -} - -#[allow(clippy::needless_pass_by_value)] -#[must_use] -pub fn hide_sensitive(volume: RwObject) -> RwObject { - let mut new_volume = volume; - if let VolumeType::S3(ref mut s3_volume) = new_volume.data.volume { - if let Some(AwsCredentials::AccessKey(ref mut access_key)) = s3_volume.credentials { - access_key.aws_access_key_id = "******".to_string(); - access_key.aws_secret_access_key = "******".to_string(); - } - } - new_volume -} diff --git a/crates/runtime/src/http/metastore/models.rs b/crates/runtime/src/http/metastore/models.rs deleted file mode 100644 index 0fb8d683c..000000000 --- a/crates/runtime/src/http/metastore/models.rs +++ /dev/null @@ -1,33 +0,0 @@ -use embucket_metastore::models::*; -use serde::{Deserialize, Serialize}; -use utoipa::ToSchema; - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct HTTPRwObject(pub RwObject); - -impl From> for HTTPRwObject { - fn from(rw_object: RwObject) -> Self { - Self(rw_object) - } -} - -impl From> for RwObject { - fn from(http_rw_object: HTTPRwObject) -> Self { - http_rw_object.0 - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct HTTPRwObjectVec(pub Vec>); - -impl From>> for HTTPRwObjectVec { - fn from(rw_objects: Vec>) -> Self { - Self(rw_objects.into_iter().map(HTTPRwObject::from).collect()) - } -} - -impl From> for Vec> { - fn from(http_rw_objects: HTTPRwObjectVec) -> Self { - http_rw_objects.0.into_iter().map(RwObject::from).collect() - } -} diff --git a/crates/runtime/src/http/metastore/router.rs b/crates/runtime/src/http/metastore/router.rs deleted file mode 100644 index 2b6fc6004..000000000 --- a/crates/runtime/src/http/metastore/router.rs +++ /dev/null @@ -1,56 +0,0 @@ -use crate::http::state::AppState; -use axum::routing::{delete, get, post, put}; -use axum::Router; - -#[allow(clippy::wildcard_imports)] -use crate::http::metastore::handlers::*; - -pub fn create_router() -> Router { - let metastore_router = Router::new() - .route("/volumes", get(list_volumes)) - .route("/volumes", post(create_volume)) - .route("/volumes/{volumeName}", get(get_volume)) - .route("/volumes/{volumeName}", put(update_volume)) - .route("/volumes/{volumeName}", delete(delete_volume)) - .route("/databases", get(list_databases)) - .route("/databases", post(create_database)) - .route("/databases/{databaseName}", get(get_database)) - .route("/databases/{databaseName}", put(update_database)) - .route("/databases/{databaseName}", delete(delete_database)) - .route("/databases/{databaseName}/schemas", get(list_schemas)) - .route("/databases/{databaseName}/schemas", post(create_schema)) - .route( - "/databases/{databaseName}/schemas/{schemaName}", - get(get_schema), - ) - .route( - "/databases/{databaseName}/schemas/{schemaName}", - put(update_schema), - ) - .route( - "/databases/{databaseName}/schemas/{schemaName}", - delete(delete_schema), - ) - .route( - "/databases/{databaseName}/schemas/{schemaName}/tables", - get(list_tables), - ) - .route( - "/databases/{databaseName}/schemas/{schemaName}/tables", - post(create_table), - ) - .route( - "/databases/{databaseName}/schemas/{schemaName}/tables/{tableName}", - get(get_table), - ) - .route( - "/databases/{databaseName}/schemas/{schemaName}/tables/{tableName}", - put(update_table), - ) - .route( - "/databases/{databaseName}/schemas/{schemaName}/tables/{tableName}", - delete(delete_table), - ); - - Router::new().nest("/v1/metastore", metastore_router) -} diff --git a/crates/runtime/src/http/mod.rs b/crates/runtime/src/http/mod.rs deleted file mode 100644 index 09fa10b27..000000000 --- a/crates/runtime/src/http/mod.rs +++ /dev/null @@ -1,200 +0,0 @@ -use axum::{ - body::{Body, Bytes}, - extract::Request, - http::StatusCode, - middleware::{self, Next}, - response::{IntoResponse, Response}, - Router, -}; -use embucket_history::store::SlateDBWorksheetsStore; -use embucket_metastore::SlateDBMetastore; -use http_body_util::BodyExt; -use std::sync::Arc; -use time::Duration; -use tokio::signal; -use tower_http::trace::TraceLayer; -use tower_sessions::{Expiry, SessionManagerLayer}; - -use layers::make_cors_middleware; -use session::{RequestSessionMemory, RequestSessionStore}; - -use crate::execution::recording_service::RecordingExecutionService; -use crate::execution::{self, service::CoreExecutionService}; -use embucket_utils::Db; - -pub mod error; - -pub mod auth; -pub mod catalog; -pub mod dbt; -pub mod metastore; -pub mod ui; -pub mod web_assets; - -pub mod config; -pub mod layers; -pub mod router; -pub mod session; -pub mod state; -pub mod utils; - -#[cfg(test)] -mod tests; - -use super::http::config::WebConfig; -use crate::config::AuthConfig; - -#[allow(clippy::needless_pass_by_value)] -pub fn make_app( - metastore: Arc, - history_store: Arc, - config: &WebConfig, - auth_config: AuthConfig, -) -> Result> { - let execution_cfg = execution::utils::Config::new(&config.data_format)?; - let execution_svc = Arc::new(CoreExecutionService::new(metastore.clone(), execution_cfg)); - let execution_svc = Arc::new(RecordingExecutionService::new( - execution_svc, - history_store.clone(), - )); - - let session_memory = RequestSessionMemory::default(); - let session_store = RequestSessionStore::new(session_memory, execution_svc.clone()); - - tokio::task::spawn( - session_store - .clone() - .continuously_delete_expired(tokio::time::Duration::from_secs(60)), - ); - - let session_layer = SessionManagerLayer::new(session_store) - .with_secure(false) - .with_expiry(Expiry::OnInactivity(Duration::seconds(5 * 60))); - - // Create the application state - let app_state = state::AppState::new( - metastore, - history_store, - execution_svc, - Arc::new(config.clone()), - Arc::new(auth_config), - ); - - let mut app = router::create_app(app_state) - .layer(session_layer) - .layer(TraceLayer::new_for_http()) - .layer(middleware::from_fn(print_request_response)); - - if let Some(allow_origin) = config.allow_origin.as_ref() { - app = app.layer(make_cors_middleware(allow_origin)?); - } - - Ok(app) -} - -#[allow(clippy::as_conversions)] -pub async fn run_app( - app: Router, - config: &WebConfig, - db: Arc, -) -> Result<(), Box> { - let host = config.host.clone(); - let port = config.port; - let listener = tokio::net::TcpListener::bind(format!("{host}:{port}")).await?; - let addr = listener.local_addr()?; - tracing::info!("Listening on http://{}", addr); - axum::serve(listener, app) - .with_graceful_shutdown(shutdown_signal(db)) - .await - .map_err(|e| Box::new(e) as Box) -} - -/// This func will wait for a signal to shutdown the service. -/// It will wait for either a Ctrl+C signal or a SIGTERM signal. -/// -/// # Panics -/// If the function fails to install the signal handler, it will panic. -#[allow( - clippy::expect_used, - clippy::redundant_pub_crate, - clippy::cognitive_complexity -)] -async fn shutdown_signal(db: Arc) { - let ctrl_c = async { - signal::ctrl_c() - .await - .expect("failed to install Ctrl+C handler"); - }; - - #[cfg(unix)] - let terminate = async { - signal::unix::signal(signal::unix::SignalKind::terminate()) - .expect("failed to install signal handler") - .recv() - .await; - }; - - #[cfg(not(unix))] - let terminate = std::future::pending::<()>(); - - tokio::select! { - () = ctrl_c => { - db.close().await.expect("Failed to close database"); - tracing::warn!("Ctrl+C received, starting graceful shutdown"); - }, - () = terminate => { - db.close().await.expect("Failed to close database"); - tracing::warn!("SIGTERM received, starting graceful shutdown"); - }, - } - - tracing::warn!("signal received, starting graceful shutdown"); -} - -async fn print_request_response( - req: Request, - next: Next, -) -> Result { - let (req_parts, req_body) = req.into_parts(); - let method = req_parts.method.to_string(); - let uri = req_parts.uri.to_string(); - let bytes = buffer_and_print("request", &method, &uri, req_body).await?; - let req = Request::from_parts(req_parts, Body::from(bytes)); - let res = next.run(req).await; - - let (resp_parts, resp_body) = res.into_parts(); - let bytes = buffer_and_print("response", &method, &uri, resp_body).await?; - let res = Response::from_parts(resp_parts, Body::from(bytes)); - - Ok(res) -} - -async fn buffer_and_print( - direction: &str, - method: &String, - uri: &String, - body: B, -) -> Result -where - B: axum::body::HttpBody + Send, - B::Error: std::fmt::Display, -{ - let bytes = match body.collect().await { - Ok(collected) => collected.to_bytes(), - Err(err) => { - return Err(( - StatusCode::BAD_REQUEST, - format!("failed to read {direction} body: {err}"), - )); - } - }; - - if let Ok(body) = std::str::from_utf8(&bytes) { - // Skip upload endpoint logs as they can be large - if !uri.contains("upload") && !uri.contains("auth") { - tracing::debug!("{direction} {method} {uri} body = {body}"); - } - } - - Ok(bytes) -} diff --git a/crates/runtime/src/http/router.rs b/crates/runtime/src/http/router.rs deleted file mode 100644 index 5b2c4d53a..000000000 --- a/crates/runtime/src/http/router.rs +++ /dev/null @@ -1,340 +0,0 @@ -use axum::routing::{get, post}; -use axum::{middleware, Json, Router}; -use std::fs; -use tower_http::catch_panic::CatchPanicLayer; -use utoipa::openapi::{self}; -use utoipa::OpenApi; -use utoipa_swagger_ui::SwaggerUi; - -use crate::http::catalog::router::create_router as create_iceberg_router; -use crate::http::dbt::router::create_router as create_dbt_router; -// use crate::http::ui::old_handlers::tables::ApiDoc as TableApiDoc; -use super::metastore::router::create_router as create_metastore_router; -use crate::http::auth::layer::require_auth; -use crate::http::auth::router::create_router as create_auth_router; -use crate::http::state::AppState; -use crate::http::ui::router::{create_router as create_ui_router, ui_open_api_spec}; -use tower_http::timeout::TimeoutLayer; - -// TODO: Fix OpenAPI spec generation -#[derive(OpenApi)] -#[openapi()] -pub struct ApiDoc; - -pub fn create_app(state: AppState) -> Router { - let mut spec = ApiDoc::openapi(); - if let Some(extra_spec) = load_openapi_spec() { - spec = spec.merge_from(extra_spec); - } - - let ui_spec = ui_open_api_spec(); - // if let Some(extra_spec) = load_openapi_spec() { - // ui_spec = ui_spec.merge_from(extra_spec); - // } - let metastore_router = create_metastore_router(); - let ui_router = create_ui_router(); - let dbt_router = create_dbt_router(); - let iceberg_catalog = create_iceberg_router(); - let auth_router = create_auth_router(); - - Router::new() - .merge(metastore_router) - .merge(ui_router) - // middleware wraping all routes above ^^ - .layer(middleware::from_fn_with_state(state.clone(), require_auth)) - .merge(auth_router) - .merge(dbt_router) - .nest("/catalog", iceberg_catalog) - .merge( - SwaggerUi::new("/") - .url("/openapi.json", spec) - .url("/ui_openapi.json", ui_spec), - ) - .route("/health", get(|| async { Json("OK") })) - .route("/telemetry/send", post(|| async { Json("OK") })) - .layer(TimeoutLayer::new(std::time::Duration::from_secs(1200))) - .layer(CatchPanicLayer::new()) - //.layer(super::layers::make_cors_middleware(allow_origin.unwrap_or("*".to_string()))) - .with_state(state) -} - -fn load_openapi_spec() -> Option { - let openapi_yaml_content = fs::read_to_string("rest-catalog-open-api.yaml").ok()?; - let mut original_spec = serde_yaml::from_str::(&openapi_yaml_content).ok()?; - // Dropping all paths from the original spec - original_spec.paths = openapi::Paths::new(); - Some(original_spec) -} - -#[cfg(test)] -mod tests { - #![allow(clippy::too_many_lines, clippy::unwrap_used)] - - /*use super::*; - use axum::{ - body::Body, - http::{self, Request, StatusCode}, - }; - use catalog::repository::{DatabaseRepositoryDb, TableRepositoryDb}; - use catalog::service::CatalogImpl; - use control_plane::repository::{StorageProfileRepositoryDb, WarehouseRepositoryDb}; - use control_plane::service::ControlServiceImpl; - use control_plane::utils::Config; - use http_body_util::BodyExt; - // for `collect` - use object_store::{memory::InMemory, path::Path, ObjectStore}; - use serde_json::json; - use slatedb::config::DbOptions; - use slatedb::db::Db as SlateDb; - use std::sync::Arc; - use tempfile::TempDir; - use tower::{Service, ServiceExt}; - use utils::Db; - - lazy_static::lazy_static! { - static ref TEMP_DIR: TempDir = TempDir::new().unwrap(); - } - - async fn create_router() -> Router { - let db = { - let object_store: Arc = Arc::new(InMemory::new()); - let options = DbOptions::default(); - Arc::new(Db::new( - Arc::new(SlateDb::open_with_opts(Path::from("/tmp/test_kv_store"), options, object_store) - .await - .unwrap(), - ))) - }; - - // Initialize the repository and concrete service implementation - let control_svc = { - let storage_profile_repo = StorageProfileRepositoryDb::new(db.clone()); - let warehouse_repo = WarehouseRepositoryDb::new(db.clone()); - let config = Config::new("json"); - ControlServiceImpl::new( - Arc::new(storage_profile_repo), - Arc::new(warehouse_repo), - config, - ) - }; - - let catalog_svc = { - let t_repo = TableRepositoryDb::new(db.clone()); - let db_repo = DatabaseRepositoryDb::new(db); - - CatalogImpl::new(Arc::new(t_repo), Arc::new(db_repo)) - }; - - let app_state = AppState::new(Arc::new(control_svc), Arc::new(catalog_svc)); - create_app(app_state) - } - - #[tokio::test] - async fn test_create_storage_profile() { - let app = create_router().await; - let payload = json!({ - "type": "aws", - "region": "us-west-2", - "bucket": "my-bucket", - "credentials": { - "credential_type": "access_key", - "aws_access_key_id": "my-access-key", - "aws_secret_access_key": "my-secret-access-key" - } - }); - let response = app - .oneshot( - Request::builder() - .uri("/v1/storage-profile") - .method(http::Method::POST) - .header(http::header::CONTENT_TYPE, "application/json") - .body(Body::from(serde_json::to_vec(&payload).unwrap())) - .unwrap(), - ) - .await - .unwrap(); - - assert_eq!(response.status(), StatusCode::OK); - } - - #[tokio::test] - async fn test_create_get_table() { - let mut app = create_router().await.into_service(); - let payload = json!({ - "type": "aws", - "region": "us-west-2", - "bucket": "my-bucket", - "credentials": { - "credential_type": "access_key", - "aws_access_key_id": "my-access-key", - "aws_secret_access_key": "my-secret-access-key" - } - }); - let request = Request::builder() - .uri("/v1/storage-profile") - .method(http::Method::POST) - .header(http::header::CONTENT_TYPE, "application/json") - .body(Body::from(serde_json::to_vec(&payload).unwrap())) - .unwrap(); - let response = ServiceExt::>::ready(&mut app) - .await - .unwrap() - .call(request) - .await - .unwrap(); - - assert_eq!(response.status(), StatusCode::OK); - let body = response.into_body().collect().await.unwrap().to_bytes(); - let sid = serde_json::from_slice::(&body) - .unwrap() - .id; - - // Now create warehouse - let payload = json!({ - "name": "my-warehouse", - "storage_profile_id": sid, - "prefix": "my-prefix", - }); - let request = Request::builder() - .uri("/v1/warehouse") - .method(http::Method::POST) - .header(http::header::CONTENT_TYPE, "application/json") - .body(Body::from(serde_json::to_vec(&payload).unwrap())) - .unwrap(); - let response = ServiceExt::>::ready(&mut app) - .await - .unwrap() - .call(request) - .await - .unwrap(); - - assert_eq!(response.status(), StatusCode::OK); - - let body = response.into_body().collect().await.unwrap().to_bytes(); - let wid = serde_json::from_slice::(&body).unwrap().id; - - // Now create namespace - let payload = json!({ - "namespace": ["my-namespace"], - "properties": { - "key": "value" - } - }); - let request = Request::builder() - .uri(format!("/catalog/v1/{wid}/namespaces")) - .method(http::Method::POST) - .header(http::header::CONTENT_TYPE, "application/json") - .body(Body::from(serde_json::to_vec(&payload).unwrap())) - .unwrap(); - let response = ServiceExt::>::ready(&mut app) - .await - .unwrap() - .call(request) - .await - .unwrap(); - - assert_eq!(response.status(), StatusCode::OK); - let body = response.into_body().collect().await.unwrap().to_bytes(); - - let namespace_id = serde_json::from_slice::(&body) - .unwrap() - .namespace; - let namespace_id = namespace_id.inner().first().unwrap().clone(); - - // Now get namespace - let request = Request::builder() - .uri(format!("/catalog/v1/{wid}/namespaces/{namespace_id}")) - .method(http::Method::GET) - .header(http::header::CONTENT_TYPE, "application/json") - .body(Body::empty()) - .unwrap(); - let response = ServiceExt::>::ready(&mut app) - .await - .unwrap() - .call(request) - .await - .unwrap(); - - assert_eq!(response.status(), StatusCode::OK); - let body = response.into_body().collect().await.unwrap().to_bytes(); - let namespace_id = serde_json::from_slice::(&body) - .unwrap() - .namespace; - - assert_eq!(namespace_id.inner(), vec!["my-namespace"]); - - // Now let's create table - let payload = json!({ - "name": "my-table", - "type": "struct", - "schema": { - "schema-id": 1, - "type": "struct", - "fields": [ - { - "id": 1, - "name": "id", - "type": "int", - "required": true - }, - { - "id": 2, - "name": "name", - "type": "string", - "required": true - } - ], - "identifier-field-ids": [1] - }, - }); - let request = Request::builder() - .uri(format!("/catalog/v1/{wid}/namespace/my-namespace/table")) - .method(http::Method::POST) - .header(http::header::CONTENT_TYPE, "application/json") - .body(Body::from(serde_json::to_vec(&payload).unwrap())) - .unwrap(); - let _response = ServiceExt::>::ready(&mut app) - .await - .unwrap() - .call(request) - .await - .unwrap(); - - //println!("{:?}", response.into_body().collect().await.unwrap()); - // assert_eq!(response.status(), StatusCode::OK); - } - - #[tokio::test] - #[should_panic(expected = "not implemented")] - async fn test_error_handling() { - panic!("not implemented"); - - /*let app = create_router().await; - - // Mock service that returns an error - let payload = json!({ - "type": "aws", - "region": "us-west-2", - "bucket": "my-bucket", - "credentials": { - "credential_type": "access_key", - "aws_access_key_id": "wrong-access-key", - "aws_secret_access_key": "wrong-secret-access" - } - }); - - let response = app - .oneshot( - Request::builder() - .uri("/v1/storage-profile") - .method(http::Method::POST) - .header(http::header::CONTENT_TYPE, "application/json") - .body(Body::from(serde_json::to_vec(&payload).unwrap())) - .unwrap(), - ) - .await - .unwrap(); - - assert_eq!(response.status(), StatusCode::BAD_REQUEST);*/ - }*/ -} diff --git a/crates/runtime/src/http/tests/router_tests.rs b/crates/runtime/src/http/tests/router_tests.rs deleted file mode 100644 index 6babf459f..000000000 --- a/crates/runtime/src/http/tests/router_tests.rs +++ /dev/null @@ -1,27 +0,0 @@ -use axum::Router; -use nexus::repository::InMemoryStorageProfileRepository; -use nexus::router::create_app; -use nexus::service::StorageProfileServiceImpl; -use nexus::state::AppState; - -fn create_app() -> Router { - let repository = Arc::new(InMemoryStorageProfileRepository::new()); - let storage_profile_service = Arc::new(StorageProfileServiceImpl::new(repository)); - let app_state = AppState::new(storage_profile_service); - create_app(app_state) -} - -#[tokio::test] -async fn test_create_storage_profile() { - let response = app - .oneshot( - Request::builder() - .uri("/v1/storage-profile") - .body(Body::empty()) - .unwrap(), - ) - .await - .unwrap(); - - assert_eq!(response.status(), StatusCode::OK); -} diff --git a/crates/runtime/src/http/tests/ui_tests.postman_collection.json b/crates/runtime/src/http/tests/ui_tests.postman_collection.json deleted file mode 100644 index 534f1ab55..000000000 --- a/crates/runtime/src/http/tests/ui_tests.postman_collection.json +++ /dev/null @@ -1,672 +0,0 @@ -{ - "info": { - "_postman_id": "e9a79523-263d-4f5c-8289-c7b38c4d854b", - "name": "UI Tests", - "schema": "https://schema.getpostman.com/json/collection/v2.1.0/collection.json", - "_exporter_id": "39228746" - }, - "item": [ - { - "name": "Process a VALID transaction", - "item": [ - { - "name": "Create storage-profile", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Successful POST request\", function () {", - " pm.expect(pm.response.code).to.be.oneOf([200, 201]);", - " pm.collectionVariables.set(\"storage_profile_id\", pm.response.json().id)", - "});", - "" - ], - "type": "text/javascript", - "packages": {} - } - }, - { - "listen": "prerequest", - "script": { - "exec": [ - "" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "POST", - "header": [], - "body": { - "mode": "raw", - "raw": "{\n \"bucket\": \"artem_test\",\n \"credentials\": {\n \"awsAccessKeyId\": \"123\",\n \"awsSecretAccessKey\": \"123\"\n },\n \"region\": \"us-east-2\",\n \"endpoint\": \"\",\n \"stsRoleArn\": \"\",\n \"type\": \"aws\"\n}", - "options": { - "raw": { - "language": "json" - } - } - }, - "url": { - "raw": "{{rust_url}}/ui/storage-profiles", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "storage-profiles" - ] - }, - "description": "This is a POST request, submitting data to an API via the request body. This request submits JSON data, and the data is reflected in the response.\n\nA successful POST request typically returns a `200 OK` or `201 Created` response code." - }, - "response": [] - }, - { - "name": "Get storage-profile", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Status code is 200\", function () {", - " pm.response.to.have.status(200);", - "});" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "GET", - "header": [], - "url": { - "raw": "{{rust_url}}/ui/storage-profiles/{{storage_profile_id}}", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "storage-profiles", - "{{storage_profile_id}}" - ] - }, - "description": "This is a GET request and it is used to \"get\" data from an endpoint. There is no request body for a GET request, but you can use query parameters to help specify the resource you want data on (e.g., in this request, we have `id=1`).\n\nA successful GET response will have a `200 OK` status, and should include some kind of response body - for example, HTML web content or JSON data." - }, - "response": [] - }, - { - "name": "Get storage-profile error", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Status code is 200\", function () {", - " pm.response.to.have.status(200);", - "});" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "GET", - "header": [], - "url": { - "raw": "{{rust_url}}/ui/storage-profiles/00000000-a59e-4031-951f-6204466fb70f", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "storage-profiles", - "00000000-a59e-4031-951f-6204466fb70f" - ] - }, - "description": "This is a GET request and it is used to \"get\" data from an endpoint. There is no request body for a GET request, but you can use query parameters to help specify the resource you want data on (e.g., in this request, we have `id=1`).\n\nA successful GET response will have a `200 OK` status, and should include some kind of response body - for example, HTML web content or JSON data." - }, - "response": [] - }, - { - "name": "Get storage-profiles", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Status code is 200\", function () {", - " pm.response.to.have.status(200);", - "});" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "GET", - "header": [], - "url": { - "raw": "{{rust_url}}/ui/storage-profiles", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "storage-profiles" - ] - }, - "description": "This is a GET request and it is used to \"get\" data from an endpoint. There is no request body for a GET request, but you can use query parameters to help specify the resource you want data on (e.g., in this request, we have `id=1`).\n\nA successful GET response will have a `200 OK` status, and should include some kind of response body - for example, HTML web content or JSON data." - }, - "response": [] - }, - { - "name": "Navigation", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Status code is 200\", function () {", - " pm.response.to.have.status(200);", - "});" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "GET", - "header": [], - "url": { - "raw": "{{rust_url}}/ui/navigation", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "navigation" - ] - }, - "description": "This is a GET request and it is used to \"get\" data from an endpoint. There is no request body for a GET request, but you can use query parameters to help specify the resource you want data on (e.g., in this request, we have `id=1`).\n\nA successful GET response will have a `200 OK` status, and should include some kind of response body - for example, HTML web content or JSON data." - }, - "response": [] - }, - { - "name": "Create warehouse", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Successful POST request\", function () {", - " pm.expect(pm.response.code).to.be.oneOf([200, 201]);", - " pm.collectionVariables.set(\"wh_id\", pm.response.json().id)", - "});", - "" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "POST", - "header": [], - "body": { - "mode": "raw", - "raw": "{\n \"storageProfileId\": \"{{storage_profile_id}}\",\n \"name\": \"name\",\n \"keyPrefix\": \"test\"\n}", - "options": { - "raw": { - "language": "json" - } - } - }, - "url": { - "raw": "{{rust_url}}/ui/warehouses", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "warehouses" - ] - }, - "description": "This is a POST request, submitting data to an API via the request body. This request submits JSON data, and the data is reflected in the response.\n\nA successful POST request typically returns a `200 OK` or `201 Created` response code." - }, - "response": [] - }, - { - "name": "Create warehouse error", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Successful POST request\", function () {", - " pm.expect(pm.response.code).to.be.oneOf([200, 201]);", - "});", - "" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "POST", - "header": [], - "body": { - "mode": "raw", - "raw": "{\n \"storage_profile_id\": \"00000000-9560-42db-ab5a-d3e14b5ed300\",\n \"name\": \"name\",\n \"key_prefix\": \"test\"\n}", - "options": { - "raw": { - "language": "json" - } - } - }, - "url": { - "raw": "{{rust_url}}/ui/warehouses", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "warehouses" - ] - }, - "description": "This is a POST request, submitting data to an API via the request body. This request submits JSON data, and the data is reflected in the response.\n\nA successful POST request typically returns a `200 OK` or `201 Created` response code." - }, - "response": [] - }, - { - "name": "Get warehouses", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Status code is 200\", function () {", - " pm.response.to.have.status(200);", - "});" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "GET", - "header": [], - "url": { - "raw": "{{rust_url}}/ui/warehouses", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "warehouses" - ] - }, - "description": "This is a GET request and it is used to \"get\" data from an endpoint. There is no request body for a GET request, but you can use query parameters to help specify the resource you want data on (e.g., in this request, we have `id=1`).\n\nA successful GET response will have a `200 OK` status, and should include some kind of response body - for example, HTML web content or JSON data." - }, - "response": [] - }, - { - "name": "Get warehouse", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Status code is 200\", function () {", - " pm.response.to.have.status(200);", - "});" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "GET", - "header": [], - "url": { - "raw": "{{rust_url}}/ui/warehouses/{{wh_id}}", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "warehouses", - "{{wh_id}}" - ] - }, - "description": "This is a GET request and it is used to \"get\" data from an endpoint. There is no request body for a GET request, but you can use query parameters to help specify the resource you want data on (e.g., in this request, we have `id=1`).\n\nA successful GET response will have a `200 OK` status, and should include some kind of response body - for example, HTML web content or JSON data." - }, - "response": [] - }, - { - "name": "Create database", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Successful POST request\", function () {", - " pm.expect(pm.response.code).to.be.oneOf([200, 201]);", - " pm.collectionVariables.set(\"dn_name\", pm.response.json().name)", - "});", - "" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "POST", - "header": [], - "body": { - "mode": "raw", - "raw": " { \n \"name\": \"name2\",\n \"properties\" : {\"testId\": \"eqqq\"}\n}", - "options": { - "raw": { - "language": "json" - } - } - }, - "url": { - "raw": "{{rust_url}}/ui/warehouses/{{wh_id}}/databases", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "warehouses", - "{{wh_id}}", - "databases" - ] - }, - "description": "This is a POST request, submitting data to an API via the request body. This request submits JSON data, and the data is reflected in the response.\n\nA successful POST request typically returns a `200 OK` or `201 Created` response code." - }, - "response": [] - }, - { - "name": "Get database", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Status code is 200\", function () {", - " pm.response.to.have.status(200);", - "});" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "GET", - "header": [], - "url": { - "raw": "{{rust_url}}/ui/warehouses/{{wh_id}}/databases/{{dn_name}}", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "warehouses", - "{{wh_id}}", - "databases", - "{{dn_name}}" - ] - }, - "description": "This is a GET request and it is used to \"get\" data from an endpoint. There is no request body for a GET request, but you can use query parameters to help specify the resource you want data on (e.g., in this request, we have `id=1`).\n\nA successful GET response will have a `200 OK` status, and should include some kind of response body - for example, HTML web content or JSON data." - }, - "response": [] - }, - { - "name": "Get database error", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Status code is 200\", function () {", - " pm.response.to.have.status(200);", - "});" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "GET", - "header": [], - "url": { - "raw": "{{rust_url}}/ui/warehouses/{{wh_id}}/databases/test_error", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "warehouses", - "{{wh_id}}", - "databases", - "test_error" - ] - }, - "description": "This is a GET request and it is used to \"get\" data from an endpoint. There is no request body for a GET request, but you can use query parameters to help specify the resource you want data on (e.g., in this request, we have `id=1`).\n\nA successful GET response will have a `200 OK` status, and should include some kind of response body - for example, HTML web content or JSON data." - }, - "response": [] - }, - { - "name": "Create table", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Successful POST request\", function () {", - " pm.expect(pm.response.code).to.be.oneOf([200, 201]);", - " pm.collectionVariables.set(\"table_name\", pm.response.json().name)", - "});", - "" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "request": { - "method": "POST", - "header": [], - "body": { - "mode": "raw", - "raw": "{\n \"name\": \"table_data_2\",\n \"location\": \"/home/iceberg/warehouse/nyc/taxis\",\n \"schema\": {\n \"type\": \"struct\",\n \"schemaId\": 0,\n \"fields\": [\n {\n \"id\": 1,\n \"name\": \"vendor_id\",\n \"required\": false,\n \"type\": \"long\"\n },\n {\n \"id\": 2,\n \"name\": \"trip_id\",\n \"required\": false,\n \"type\": \"long\"\n },\n {\n \"id\": 3,\n \"name\": \"trip_distance\",\n \"required\": false,\n \"type\": \"float\"\n },\n {\n \"id\": 4,\n \"name\": \"fare_amount\",\n \"required\": false,\n \"type\": \"double\"\n },\n {\n \"id\": 5,\n \"name\": \"store_and_fwd_flag\",\n \"required\": false,\n \"type\": \"string\"\n }\n ]\n },\n \"partitionSpec\": {\n \"spec_id\": 1,\n \"fields\": [\n {\n \"name\": \"vendor_id\",\n \"transform\": \"identity\",\n \"source-id\": 1,\n \"field-id\": 1000\n }\n ]\n },\n \"sortOrders\": [\n {\n \"orderId\": 0,\n \"fields\": [\n {\n \"direction\": \"asc\",\n \"transform\": \"identity\",\n \"source-id\": 1,\n \"null-order\": \"nulls-first\"\n }\n ]\n }\n ],\n \"properties\": {\n \"owner\": \"root\"\n }\n}", - "options": { - "raw": { - "language": "json" - } - } - }, - "url": { - "raw": "{{rust_url}}/ui/warehouses/{{wh_id}}/databases/{{dn_name}}/tables", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "warehouses", - "{{wh_id}}", - "databases", - "{{dn_name}}", - "tables" - ] - }, - "description": "This is a POST request, submitting data to an API via the request body. This request submits JSON data, and the data is reflected in the response.\n\nA successful POST request typically returns a `200 OK` or `201 Created` response code." - }, - "response": [] - }, - { - "name": "Get table", - "event": [ - { - "listen": "test", - "script": { - "exec": [ - "pm.test(\"Successful POST request\", function () {", - " pm.expect(pm.response.code).to.be.oneOf([200, 201]);", - "});", - "" - ], - "type": "text/javascript", - "packages": {} - } - } - ], - "protocolProfileBehavior": { - "disableBodyPruning": true - }, - "request": { - "method": "GET", - "header": [], - "body": { - "mode": "raw", - "raw": " { \n \"warehouse_id\": \"0c648454-00fe-44a9-a729-270c3e000cff\",\n \"name\": \"name\",\n \"properties\" : {\"test\": \"eqqq\"}\n}", - "options": { - "raw": { - "language": "json" - } - } - }, - "url": { - "raw": "{{rust_url}}/ui/warehouses/{{wh_id}}/databases/{{dn_name}}/tables/{{table_name}}", - "host": [ - "{{rust_url}}" - ], - "path": [ - "ui", - "warehouses", - "{{wh_id}}", - "databases", - "{{dn_name}}", - "tables", - "{{table_name}}" - ] - }, - "description": "This is a POST request, submitting data to an API via the request body. This request submits JSON data, and the data is reflected in the response.\n\nA successful POST request typically returns a `200 OK` or `201 Created` response code." - }, - "response": [] - } - ], - "description": "This folder tests a commonly used workflow of creating new accounts and transferring funds between the two.\n\nBecause we expect all requests to succeed in this workflow, we have a **status check test** in the **folder-level Scripts tab**.", - "event": [ - { - "listen": "prerequest", - "script": { - "type": "text/javascript", - "exec": [ - "" - ] - } - }, - { - "listen": "test", - "script": { - "type": "text/javascript", - "exec": [ - "//all requests in this folder should be valid", - "pm.test(\"Folder-level status check\", function () {", - " pm.response.to.have.status(200);", - "});" - ] - } - } - ] - } - ], - "auth": { - "type": "apikey", - "apikey": [ - { - "key": "key", - "value": "api-key", - "type": "string" - }, - { - "key": "value", - "value": "{{apiKey}}", - "type": "string" - } - ] - }, - "event": [ - { - "listen": "prerequest", - "script": { - "type": "text/javascript", - "exec": [ - "" - ] - } - }, - { - "listen": "test", - "script": { - "type": "text/javascript", - "exec": [ - "" - ] - } - } - ], - "variable": [ - { - "key": "baseUrl", - "value": "https://template.postman-echo.com", - "type": "string" - }, - { - "key": "apiKey", - "value": "OMpqVWAH.UC80wyXTtPwhDgAUdCTx6" - }, - { - "key": "storage_profile_id", - "value": "" - }, - { - "key": "sp_id", - "value": "" - }, - { - "key": "wh_id", - "value": "" - }, - { - "key": "db_id", - "value": "" - }, - { - "key": "db_name", - "value": "" - }, - { - "key": "table_name", - "value": "" - }, - { - "key": "dn_name", - "value": "" - } - ] -} \ No newline at end of file diff --git a/crates/runtime/src/http/tests/web_assets.rs b/crates/runtime/src/http/tests/web_assets.rs deleted file mode 100644 index 8466c40ec..000000000 --- a/crates/runtime/src/http/tests/web_assets.rs +++ /dev/null @@ -1,90 +0,0 @@ -use crate::http::web_assets::{config::StaticWebConfig, run_web_assets_server}; -use http::Method; -use reqwest; -use reqwest::header; - -#[allow(clippy::expect_used)] -#[tokio::test] -async fn test_web_assets_server() { - let addr = run_web_assets_server(&StaticWebConfig { - host: "0.0.0.0".to_string(), - port: 0, - allow_origin: None, - }) - .await; - - assert!(addr.is_ok()); - - let client = reqwest::Client::new(); - let addr = addr.expect("Failed to run web assets server"); - let res = client - .request(Method::GET, format!("http://{addr}/index.html")) - .send() - .await - .expect("Failed to send request to web assets server"); - - assert_eq!(http::StatusCode::OK, res.status()); - - let content_length = res - .headers() - .get(header::CONTENT_LENGTH) - .expect("Content-Length header not found") - .to_str() - .expect("Failed to get str from Content-Length header") - .parse::() - .expect("Failed to parse Content-Length header"); - - assert!(content_length > 0); -} - -#[allow(clippy::expect_used)] -#[tokio::test] -async fn test_web_assets_server_redirect() { - let addr = run_web_assets_server(&StaticWebConfig { - host: "0.0.0.0".to_string(), - port: 0, - allow_origin: None, - }) - .await; - - assert!(addr.is_ok()); - - let client = reqwest::Client::builder() - .redirect(reqwest::redirect::Policy::none()) - .build() - .expect("Failed to build client for redirect"); - - let addr = addr.expect("Failed to run web assets server"); - let res = client - .request(Method::GET, format!("http://{addr}/deadbeaf")) - .send() - .await - .expect("Failed to send request to web assets server"); - - assert_eq!(http::StatusCode::SEE_OTHER, res.status()); - - let redirect = res - .headers() - .get(header::LOCATION) - .expect("Location header not found") - .to_str() - .expect("Failed to get str from Location header"); - assert_eq!(redirect, "/index.html"); - - // redirect from root to index.html - let res = client - .request(Method::GET, format!("http://{addr}/")) - .send() - .await - .expect("Failed to send request to web assets server"); - - assert_eq!(http::StatusCode::SEE_OTHER, res.status()); - - let redirect = res - .headers() - .get(header::LOCATION) - .expect("Location header not found") - .to_str() - .expect("Failed to get str from Location header"); - assert_eq!(redirect, "/index.html"); -} diff --git a/crates/runtime/src/http/ui/error.rs b/crates/runtime/src/http/ui/error.rs deleted file mode 100644 index a25f1dd28..000000000 --- a/crates/runtime/src/http/ui/error.rs +++ /dev/null @@ -1,44 +0,0 @@ -use crate::http::metastore::error::MetastoreAPIError; -use axum::response::{IntoResponse, Response}; -use http::StatusCode; -use snafu::prelude::*; - -#[derive(Snafu, Debug)] -#[snafu(visibility(pub))] -pub enum UIError { - #[snafu(transparent)] - Execution { - source: crate::execution::error::ExecutionError, - }, - #[snafu(transparent)] - Metastore { - source: embucket_metastore::error::MetastoreError, - }, -} -pub type UIResult = Result; - -pub(crate) trait IntoStatusCode { - fn status_code(&self) -> StatusCode; -} - -// #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] -// #[serde(rename_all = "camelCase")] -// pub(crate) struct UIResponse { -// #[serde(flatten)] -// pub(crate) data: T, -// } -// -// impl UIResponse { -// pub const fn from(data: T) -> Json { -// Json(Self { data }) -// } -// } - -impl IntoResponse for UIError { - fn into_response(self) -> Response { - match self { - Self::Execution { source } => source.into_response(), - Self::Metastore { source } => MetastoreAPIError(source).into_response(), - } - } -} diff --git a/crates/runtime/src/http/utils.rs b/crates/runtime/src/http/utils.rs deleted file mode 100644 index ffe9d831b..000000000 --- a/crates/runtime/src/http/utils.rs +++ /dev/null @@ -1,17 +0,0 @@ -use chrono::Utc; -use std::collections::HashMap; - -#[allow(clippy::implicit_hasher)] -pub fn update_properties_timestamps(properties: &mut HashMap) { - let utc_now = Utc::now(); - let utc_now_str = utc_now.to_rfc3339(); - properties.insert("created_at".to_string(), utc_now_str.clone()); - properties.insert("updated_at".to_string(), utc_now_str); -} - -#[must_use] -pub fn get_default_properties() -> HashMap { - let mut properties = HashMap::new(); - update_properties_timestamps(&mut properties); - properties -} diff --git a/crates/runtime/src/http/web_assets/server.rs b/crates/runtime/src/http/web_assets/server.rs deleted file mode 100644 index 70f0d56a1..000000000 --- a/crates/runtime/src/http/web_assets/server.rs +++ /dev/null @@ -1,42 +0,0 @@ -use super::config::StaticWebConfig; -use super::handler::WEB_ASSETS_MOUNT_PATH; -use super::handler::{root_handler, tar_handler}; -use crate::http::layers::make_cors_middleware; -use axum::{routing::get, Router}; -use core::net::SocketAddr; -use tower_http::trace::TraceLayer; - -#[allow(clippy::unwrap_used, clippy::as_conversions)] -pub async fn run_web_assets_server( - config: &StaticWebConfig, -) -> Result> { - let StaticWebConfig { - host, - port, - allow_origin, - } = config; - - let mut app = Router::new() - .route(WEB_ASSETS_MOUNT_PATH, get(root_handler)) - .route( - format!("{WEB_ASSETS_MOUNT_PATH}{{*path}}").as_str(), - get(tar_handler), - ) - .layer(TraceLayer::new_for_http()); - - if let Some(allow_origin) = allow_origin.as_ref() { - app = app.layer(make_cors_middleware(allow_origin)?); - } - - let listener = tokio::net::TcpListener::bind(format!("{host}:{port}")).await?; - let addr = listener.local_addr()?; - tracing::info!("Listening on http://{}", addr); - - tokio::spawn(async move { - axum::serve(listener, app) - // .with_graceful_shutdown(shutdown_signal()) - .await - }); - - Ok(addr) -} diff --git a/crates/runtime/src/lib.rs b/crates/runtime/src/lib.rs deleted file mode 100644 index fedc33f4c..000000000 --- a/crates/runtime/src/lib.rs +++ /dev/null @@ -1,46 +0,0 @@ -use std::sync::Arc; - -use config::AuthConfig; -use config::RuntimeConfig; -use embucket_history::store::SlateDBWorksheetsStore; -use embucket_metastore::SlateDBMetastore; -use embucket_utils::Db; -use http::web_assets::run_web_assets_server; -use http::{make_app, run_app}; -use object_store::{path::Path, ObjectStore}; -use slatedb::{config::DbOptions, Db as SlateDb}; - -pub mod config; -pub mod execution; -pub mod http; - -#[cfg(test)] -pub(crate) mod tests; - -#[allow(clippy::unwrap_used, clippy::as_conversions)] -pub async fn run_binary( - state_store: Arc, - config: RuntimeConfig, - auth_config: AuthConfig, -) -> Result<(), Box> { - let db = { - let options = DbOptions::default(); - Db::new(Arc::new( - SlateDb::open_with_opts( - Path::from(config.db.slatedb_prefix.clone()), - options, - state_store.clone(), - ) - .await - .map_err(Box::new)?, - )) - }; - - let metastore = Arc::new(SlateDBMetastore::new(db.clone())); - let history_store = Arc::new(SlateDBWorksheetsStore::new(db.clone())); - let app = make_app(metastore, history_store, &config.web, auth_config)?; - - let _ = run_web_assets_server(&config.web_assets).await?; - - run_app(app, &config.web, Arc::new(db)).await -} diff --git a/crates/runtime/src/tests/mod.rs b/crates/runtime/src/tests/mod.rs deleted file mode 100644 index 851748d17..000000000 --- a/crates/runtime/src/tests/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -mod auth; -mod queries; -pub mod server; -mod session; -pub mod utils; - -pub use server::*; diff --git a/crates/runtime/src/tests/queries.rs b/crates/runtime/src/tests/queries.rs deleted file mode 100644 index 3d0f669c7..000000000 --- a/crates/runtime/src/tests/queries.rs +++ /dev/null @@ -1,24 +0,0 @@ -use crate::tests::utils::macros::test_query; - -test_query!(select_date_add_diff, "SELECT dateadd(day, 5, '2025-06-01')"); -test_query!(func_date_add, "SELECT date_add(day, 30, '2025-01-06')"); -// // SELECT -test_query!(select_star, "SELECT * FROM employee_table"); -// FIXME: ILIKE is not supported yet -// test_query!(select_ilike, "SELECT * ILIKE '%id%' FROM employee_table;"); -test_query!( - select_exclude, - "SELECT * EXCLUDE department_id FROM employee_table;" -); -test_query!( - select_exclude_multiple, - "SELECT * EXCLUDE (department_id, employee_id) FROM employee_table;" -); - -test_query!( - qualify, - "SELECT product_id, retail_price, quantity, city - FROM sales - QUALIFY ROW_NUMBER() OVER (PARTITION BY city ORDER BY retail_price) = 1 - ;" -); diff --git a/crates/runtime/src/tests/server.rs b/crates/runtime/src/tests/server.rs deleted file mode 100644 index faa461318..000000000 --- a/crates/runtime/src/tests/server.rs +++ /dev/null @@ -1,50 +0,0 @@ -use crate::{ - http::{config::WebConfig, make_app}, - AuthConfig, -}; -use embucket_history::store::SlateDBWorksheetsStore; -use embucket_metastore::SlateDBMetastore; -use embucket_utils::Db; -use std::net::SocketAddr; -use std::sync::Arc; - -#[allow(clippy::unwrap_used)] -pub async fn run_test_server_with_demo_auth( - jwt_secret: String, - demo_user: String, - demo_password: String, -) -> SocketAddr { - let listener = tokio::net::TcpListener::bind("0.0.0.0:0").await.unwrap(); - let addr = listener.local_addr().unwrap(); - - let db = Db::memory().await; - let metastore = Arc::new(SlateDBMetastore::new(db.clone())); - let history = Arc::new(SlateDBWorksheetsStore::new(db)); - let mut auth_config = AuthConfig::new(jwt_secret); - auth_config.with_demo_credentials(demo_user, demo_password); - - let app = make_app( - metastore, - history, - &WebConfig { - port: 3000, - host: "0.0.0.0".to_string(), - allow_origin: None, - data_format: "json".to_string(), - iceberg_catalog_url: "http://127.0.0.1".to_string(), - }, - auth_config, - ) - .unwrap(); - - tokio::spawn(async move { - axum::serve(listener, app).await.unwrap(); - }); - - addr -} - -#[allow(clippy::unwrap_used)] -pub async fn run_test_server() -> SocketAddr { - run_test_server_with_demo_auth(String::new(), String::new(), String::new()).await -} diff --git a/crates/runtime/src/tests/session.rs b/crates/runtime/src/tests/session.rs deleted file mode 100644 index 39e76cb2e..000000000 --- a/crates/runtime/src/tests/session.rs +++ /dev/null @@ -1,98 +0,0 @@ -use std::sync::Arc; - -use embucket_metastore::{ - Database as MetastoreDatabase, Metastore, Schema as MetastoreSchema, - SchemaIdent as MetastoreSchemaIdent, SlateDBMetastore, Volume as MetastoreVolume, -}; - -use crate::execution::{query::QueryContext, session::UserSession}; - -#[tokio::test] -#[allow(clippy::expect_used, clippy::manual_let_else, clippy::too_many_lines)] -async fn test_create_table_and_insert() { - let metastore = SlateDBMetastore::new_in_memory().await; - metastore - .create_volume( - &"test_volume".to_string(), - MetastoreVolume::new( - "test_volume".to_string(), - embucket_metastore::VolumeType::Memory, - ), - ) - .await - .expect("Failed to create volume"); - metastore - .create_database( - &"benchmark".to_string(), - MetastoreDatabase { - ident: "benchmark".to_string(), - properties: None, - volume: "test_volume".to_string(), - }, - ) - .await - .expect("Failed to create database"); - let schema_ident = MetastoreSchemaIdent { - database: "benchmark".to_string(), - schema: "public".to_string(), - }; - metastore - .create_schema( - &schema_ident.clone(), - MetastoreSchema { - ident: schema_ident, - properties: None, - }, - ) - .await - .expect("Failed to create schema"); - let session = Arc::new( - UserSession::new(metastore) - .await - .expect("Failed to create user session"), - ); - let create_query = r" - CREATE TABLE benchmark.public.hits - ( - WatchID BIGINT NOT NULL, - JavaEnable INTEGER NOT NULL, - Title TEXT NOT NULL, - GoodEvent INTEGER NOT NULL, - EventTime BIGINT NOT NULL, - EventDate INTEGER NOT NULL, - CounterID INTEGER NOT NULL, - ClientIP INTEGER NOT NULL, - PRIMARY KEY (CounterID, EventDate, EventTime, WatchID) - ); - "; - let mut query1 = session.query(create_query, QueryContext::default()); - - let statement = query1.parse_query().expect("Failed to parse query"); - let result = query1.execute().await.expect("Failed to execute query"); - - let all_query = session - .query("SHOW TABLES", QueryContext::default()) - .execute() - .await - .expect("Failed to execute query"); - - let insert_query = session - .query( - "INSERT INTO benchmark.public.hits VALUES (1, 1, 'test', 1, 1, 1, 1, 1)", - QueryContext::default(), - ) - .execute() - .await - .expect("Failed to execute query"); - - let select_query = session - .query( - "SELECT * FROM benchmark.public.hits", - QueryContext::default(), - ) - .execute() - .await - .expect("Failed to execute query"); - - insta::assert_debug_snapshot!((statement, result, all_query, insert_query, select_query)); -} diff --git a/crates/runtime/src/tests/utils.rs b/crates/runtime/src/tests/utils.rs deleted file mode 100644 index e161874eb..000000000 --- a/crates/runtime/src/tests/utils.rs +++ /dev/null @@ -1,96 +0,0 @@ -use std::sync::Arc; - -use crate::execution::{query::QueryContext, session::UserSession}; -use embucket_metastore::{ - Database as MetastoreDatabase, Metastore, Schema as MetastoreSchema, - SchemaIdent as MetastoreSchemaIdent, SlateDBMetastore, Volume as MetastoreVolume, -}; - -static TABLE_SETUP: &str = include_str!(r"./queries/table_setup.sql"); - -#[allow(clippy::unwrap_used, clippy::expect_used)] -pub async fn create_df_session() -> Arc { - let metastore = SlateDBMetastore::new_in_memory().await; - metastore - .create_volume( - &"test_volume".to_string(), - MetastoreVolume::new( - "test_volume".to_string(), - embucket_metastore::VolumeType::Memory, - ), - ) - .await - .expect("Failed to create volume"); - metastore - .create_database( - &"embucket".to_string(), - MetastoreDatabase { - ident: "embucket".to_string(), - properties: None, - volume: "test_volume".to_string(), - }, - ) - .await - .expect("Failed to create database"); - let schema_ident = MetastoreSchemaIdent { - database: "embucket".to_string(), - schema: "public".to_string(), - }; - metastore - .create_schema( - &schema_ident.clone(), - MetastoreSchema { - ident: schema_ident, - properties: None, - }, - ) - .await - .expect("Failed to create schema"); - - let user_session = Arc::new( - UserSession::new(metastore) - .await - .expect("Failed to create user session"), - ); - - for query in TABLE_SETUP.split(';') { - if !query.is_empty() { - let mut query = user_session.query(query, QueryContext::default()); - query.execute().await.unwrap(); - //ctx.sql(query).await.unwrap().collect().await.unwrap(); - } - } - user_session -} - -pub mod macros { - macro_rules! test_query { - ($test_fn_name:ident, $query:expr) => { - paste::paste! { - #[tokio::test] - async fn [< query_ $test_fn_name >]() { - let ctx = crate::tests::utils::create_df_session().await; - - let mut query = ctx.query($query, crate::execution::query::QueryContext::default()); - let res = query.execute().await; - insta::with_settings!({ - description => stringify!($query), - omit_expression => true, - prepend_module_to_snapshot => false - }, { - let df = match res { - Ok(record_batches) => { - Ok(datafusion::arrow::util::pretty::pretty_format_batches(&record_batches).unwrap().to_string()) - }, - Err(e) => Err(format!("Error: {e}")) - }; - let df = df.map(|df| df.split("\n").map(|s| s.to_string()).collect::>()); - insta::assert_debug_snapshot!((df)); - }) - } - } - } - } - - pub(crate) use test_query; -} diff --git a/crates/utils/src/snapshots/utils__test__db.snap b/crates/utils/src/snapshots/utils__test__db.snap deleted file mode 100644 index bd9e904f8..000000000 --- a/crates/utils/src/snapshots/utils__test__db.snap +++ /dev/null @@ -1,29 +0,0 @@ ---- -source: crates/utils/src/lib.rs -expression: "(get_empty, get_after_put, get_after_delete, list_after_append,\nlist_after_remove)" -snapshot_kind: text ---- -( - Ok( - None, - ), - Ok( - Some( - TestEntity { - id: 1, - name: "test", - }, - ), - ), - Ok( - None, - ), - Ok( - [ - "test", - ], - ), - Ok( - [], - ), -) diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 000000000..c5cf55d45 --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1 @@ +style_edition = "2024" \ No newline at end of file